From 6d6f4e33216e1c652c6c8e28b4118e33a81ba662 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 6 Aug 2025 14:36:17 -0700 Subject: [PATCH 01/15] [thrift-remodel] Reduce use of `parquet::format` in the public API (#8048) **Note: this targets a feature branch, not main** # Which issue does this PR close? - Part of #5854. - related to https://github.com/apache/arrow-rs/issues/6129 # Rationale for this change This is the first step towards a rework of Parquet metadata handling. This PR attempts to remove as many references as possible to the `parquet::format` module in the public API. This is done by creating new enums and structs that mirror their `format` counterparts and using them in publicly exposed structures like `FileMetaData`. # What changes are included in this PR? # Are these changes tested? Current tests should suffice for now. More thorough tests will be added as needed. # Are there any user-facing changes? Yes, public facing interfaces should no longer expose `format` --- parquet/benches/metadata.rs | 54 +- parquet/src/arrow/arrow_reader/mod.rs | 8 +- parquet/src/arrow/arrow_reader/selection.rs | 5 +- parquet/src/arrow/async_reader/mod.rs | 8 +- parquet/src/arrow/async_writer/mod.rs | 13 +- parquet/src/arrow/schema/mod.rs | 12 +- parquet/src/arrow/schema/primitive.rs | 14 +- parquet/src/basic.rs | 845 ++++++++++++-------- parquet/src/bin/parquet-index.rs | 3 +- parquet/src/bloom_filter/mod.rs | 72 +- parquet/src/column/writer/mod.rs | 201 +++-- parquet/src/file/metadata/memory.rs | 9 +- parquet/src/file/metadata/mod.rs | 313 +++++++- parquet/src/file/metadata/reader.rs | 36 +- parquet/src/file/metadata/writer.rs | 100 ++- parquet/src/file/page_encoding_stats.rs | 15 +- parquet/src/file/page_index/index.rs | 28 +- parquet/src/file/page_index/index_reader.rs | 9 +- parquet/src/file/page_index/offset_index.rs | 53 +- parquet/src/file/properties.rs | 3 +- parquet/src/file/serialized_reader.rs | 7 +- parquet/src/file/statistics.rs | 12 +- parquet/src/file/writer.rs | 67 +- parquet/src/schema/parser.rs | 14 +- parquet/src/schema/printer.rs | 12 +- parquet/src/schema/types.rs | 22 +- parquet/src/thrift.rs | 21 +- 27 files changed, 1335 insertions(+), 621 deletions(-) diff --git a/parquet/benches/metadata.rs b/parquet/benches/metadata.rs index 949e0d98ea39..bb2eca0a75c1 100644 --- a/parquet/benches/metadata.rs +++ b/parquet/benches/metadata.rs @@ -151,6 +151,35 @@ fn get_footer_bytes(data: Bytes) -> Bytes { data.slice(meta_start..meta_end) } +#[cfg(feature = "arrow")] +fn rewrite_file(bytes: Bytes) -> (Bytes, FileMetaData) { + use arrow::array::RecordBatchReader; + use parquet::arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter}; + use parquet::file::properties::{EnabledStatistics, WriterProperties}; + + let parquet_reader = ParquetRecordBatchReaderBuilder::try_new(bytes) + .expect("parquet open") + .build() + .expect("parquet open"); + let writer_properties = WriterProperties::builder() + .set_statistics_enabled(EnabledStatistics::Page) + .build(); + let mut output = Vec::new(); + let mut parquet_writer = ArrowWriter::try_new( + &mut output, + parquet_reader.schema(), + Some(writer_properties), + ) + .expect("create arrow writer"); + + for maybe_batch in parquet_reader { + let batch = maybe_batch.expect("reading batch"); + parquet_writer.write(&batch).expect("writing data"); + } + let file_meta = parquet_writer.close().expect("finalizing file"); + (output.into(), file_meta) +} + fn criterion_benchmark(c: &mut Criterion) { // Read file into memory to isolate filesystem performance let file = "../parquet-testing/data/alltypes_tiny_pages.parquet"; @@ -168,7 +197,7 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); - let meta_data = get_footer_bytes(data); + let meta_data = get_footer_bytes(data.clone()); c.bench_function("decode file metadata", |b| { b.iter(|| { parquet::thrift::bench_file_metadata(&meta_data); @@ -181,6 +210,29 @@ fn criterion_benchmark(c: &mut Criterion) { parquet::thrift::bench_file_metadata(&buf); }) }); + + // rewrite file with page statistics. then read page headers. + #[cfg(feature = "arrow")] + let (file_bytes, metadata) = rewrite_file(data.clone()); + #[cfg(feature = "arrow")] + c.bench_function("page headers", |b| { + b.iter(|| { + metadata.row_groups.iter().for_each(|rg| { + rg.columns.iter().for_each(|col| { + if let Some(col_meta) = &col.meta_data { + if let Some(dict_offset) = col_meta.dictionary_page_offset { + parquet::thrift::bench_page_header( + &file_bytes.slice(dict_offset as usize..), + ); + } + parquet::thrift::bench_page_header( + &file_bytes.slice(col_meta.data_page_offset as usize..), + ); + } + }); + }); + }) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index d4a3e11e2c46..a934a93ef22e 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -30,6 +30,7 @@ pub use crate::arrow::array_reader::RowGroups; use crate::arrow::array_reader::{ArrayReader, ArrayReaderBuilder}; use crate::arrow::schema::{parquet_to_arrow_schema_and_fields, ParquetField}; use crate::arrow::{parquet_to_arrow_field_levels, FieldLevels, ProjectionMask}; +use crate::basic::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash}; use crate::bloom_filter::{ chunk_read_bloom_filter_header_and_offset, Sbbf, SBBF_HEADER_SIZE_ESTIMATE, }; @@ -39,7 +40,6 @@ use crate::encryption::decrypt::FileDecryptionProperties; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader}; use crate::file::reader::{ChunkReader, SerializedPageReader}; -use crate::format::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash}; use crate::schema::types::SchemaDescriptor; pub(crate) use read_plan::{ReadPlan, ReadPlanBuilder}; @@ -737,17 +737,17 @@ impl ParquetRecordBatchReaderBuilder { chunk_read_bloom_filter_header_and_offset(offset, buffer.clone())?; match header.algorithm { - BloomFilterAlgorithm::BLOCK(_) => { + BloomFilterAlgorithm::BLOCK => { // this match exists to future proof the singleton algorithm enum } } match header.compression { - BloomFilterCompression::UNCOMPRESSED(_) => { + BloomFilterCompression::UNCOMPRESSED => { // this match exists to future proof the singleton compression enum } } match header.hash { - BloomFilterHash::XXHASH(_) => { + BloomFilterHash::XXHASH => { // this match exists to future proof the singleton hash enum } } diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index c53d47be2e56..d515adc13e07 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -21,6 +21,8 @@ use std::cmp::Ordering; use std::collections::VecDeque; use std::ops::Range; +use crate::file::page_index::offset_index::PageLocation; + /// [`RowSelection`] is a collection of [`RowSelector`] used to skip rows when /// scanning a parquet file #[derive(Debug, Clone, Copy, Eq, PartialEq)] @@ -162,7 +164,7 @@ impl RowSelection { /// Note: this method does not make any effort to combine consecutive ranges, nor coalesce /// ranges that are close together. This is instead delegated to the IO subsystem to optimise, /// e.g. [`ObjectStore::get_ranges`](object_store::ObjectStore::get_ranges) - pub fn scan_ranges(&self, page_locations: &[crate::format::PageLocation]) -> Vec> { + pub fn scan_ranges(&self, page_locations: &[PageLocation]) -> Vec> { let mut ranges: Vec> = vec![]; let mut row_offset = 0; @@ -640,7 +642,6 @@ fn union_row_selections(left: &[RowSelector], right: &[RowSelector]) -> RowSelec #[cfg(test)] mod tests { use super::*; - use crate::format::PageLocation; use rand::{rng, Rng}; #[test] diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 611d6999e07e..d738d85fb6d2 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -45,6 +45,7 @@ use crate::arrow::arrow_reader::{ }; use crate::arrow::ProjectionMask; +use crate::basic::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash}; use crate::bloom_filter::{ chunk_read_bloom_filter_header_and_offset, Sbbf, SBBF_HEADER_SIZE_ESTIMATE, }; @@ -53,7 +54,6 @@ use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader}; use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::file::reader::{ChunkReader, Length, SerializedPageReader}; -use crate::format::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash}; mod metadata; pub use metadata::*; @@ -446,17 +446,17 @@ impl ParquetRecordBatchStreamBuilder { chunk_read_bloom_filter_header_and_offset(offset, buffer.clone())?; match header.algorithm { - BloomFilterAlgorithm::BLOCK(_) => { + BloomFilterAlgorithm::BLOCK => { // this match exists to future proof the singleton algorithm enum } } match header.compression { - BloomFilterCompression::UNCOMPRESSED(_) => { + BloomFilterCompression::UNCOMPRESSED => { // this match exists to future proof the singleton compression enum } } match header.hash { - BloomFilterHash::XXHASH(_) => { + BloomFilterHash::XXHASH => { // this match exists to future proof the singleton hash enum } } diff --git a/parquet/src/arrow/async_writer/mod.rs b/parquet/src/arrow/async_writer/mod.rs index faec427907a7..4e06223a5e13 100644 --- a/parquet/src/arrow/async_writer/mod.rs +++ b/parquet/src/arrow/async_writer/mod.rs @@ -61,11 +61,12 @@ mod store; pub use store::*; use crate::{ - arrow::arrow_writer::ArrowWriterOptions, - arrow::ArrowWriter, + arrow::{arrow_writer::ArrowWriterOptions, ArrowWriter}, errors::{ParquetError, Result}, - file::{metadata::RowGroupMetaData, properties::WriterProperties}, - format::{FileMetaData, KeyValue}, + file::{ + metadata::{KeyValue, RowGroupMetaData}, + properties::WriterProperties, + }, }; use arrow_array::RecordBatch; use arrow_schema::SchemaRef; @@ -245,7 +246,7 @@ impl AsyncArrowWriter { /// Unlike [`Self::close`] this does not consume self /// /// Attempting to write after calling finish will result in an error - pub async fn finish(&mut self) -> Result { + pub async fn finish(&mut self) -> Result { let metadata = self.sync_writer.finish()?; // Force to flush the remaining data. @@ -258,7 +259,7 @@ impl AsyncArrowWriter { /// Close and finalize the writer. /// /// All the data in the inner buffer will be force flushed. - pub async fn close(mut self) -> Result { + pub async fn close(mut self) -> Result { self.finish().await } diff --git a/parquet/src/arrow/schema/mod.rs b/parquet/src/arrow/schema/mod.rs index 5b079b66276a..e5d7f8410b3c 100644 --- a/parquet/src/arrow/schema/mod.rs +++ b/parquet/src/arrow/schema/mod.rs @@ -532,9 +532,9 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { is_adjusted_to_u_t_c: matches!(tz, Some(z) if !z.as_ref().is_empty()), unit: match time_unit { TimeUnit::Second => unreachable!(), - TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()), - TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()), - TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()), + TimeUnit::Millisecond => ParquetTimeUnit::MILLIS, + TimeUnit::Microsecond => ParquetTimeUnit::MICROS, + TimeUnit::Nanosecond => ParquetTimeUnit::NANOS, }, })) .with_repetition(repetition) @@ -571,7 +571,7 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { .with_logical_type(Some(LogicalType::Time { is_adjusted_to_u_t_c: field.metadata().contains_key("adjusted_to_utc"), unit: match unit { - TimeUnit::Millisecond => ParquetTimeUnit::MILLIS(Default::default()), + TimeUnit::Millisecond => ParquetTimeUnit::MILLIS, u => unreachable!("Invalid unit for Time32: {:?}", u), }, })) @@ -582,8 +582,8 @@ fn arrow_to_parquet_type(field: &Field, coerce_types: bool) -> Result { .with_logical_type(Some(LogicalType::Time { is_adjusted_to_u_t_c: field.metadata().contains_key("adjusted_to_utc"), unit: match unit { - TimeUnit::Microsecond => ParquetTimeUnit::MICROS(Default::default()), - TimeUnit::Nanosecond => ParquetTimeUnit::NANOS(Default::default()), + TimeUnit::Microsecond => ParquetTimeUnit::MICROS, + TimeUnit::Nanosecond => ParquetTimeUnit::NANOS, u => unreachable!("Invalid unit for Time64: {:?}", u), }, })) diff --git a/parquet/src/arrow/schema/primitive.rs b/parquet/src/arrow/schema/primitive.rs index 1b3ab7d45c51..0992a05cd2a4 100644 --- a/parquet/src/arrow/schema/primitive.rs +++ b/parquet/src/arrow/schema/primitive.rs @@ -186,7 +186,7 @@ fn from_int32(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result decimal_128_type(scale, precision), (Some(LogicalType::Date), _) => Ok(DataType::Date32), (Some(LogicalType::Time { unit, .. }), _) => match unit { - ParquetTimeUnit::MILLIS(_) => Ok(DataType::Time32(TimeUnit::Millisecond)), + ParquetTimeUnit::MILLIS => Ok(DataType::Time32(TimeUnit::Millisecond)), _ => Err(arrow_err!( "Cannot create INT32 physical type from {:?}", unit @@ -225,11 +225,11 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result Ok(DataType::UInt64), }, (Some(LogicalType::Time { unit, .. }), _) => match unit { - ParquetTimeUnit::MILLIS(_) => { + ParquetTimeUnit::MILLIS => { Err(arrow_err!("Cannot create INT64 from MILLIS time unit",)) } - ParquetTimeUnit::MICROS(_) => Ok(DataType::Time64(TimeUnit::Microsecond)), - ParquetTimeUnit::NANOS(_) => Ok(DataType::Time64(TimeUnit::Nanosecond)), + ParquetTimeUnit::MICROS => Ok(DataType::Time64(TimeUnit::Microsecond)), + ParquetTimeUnit::NANOS => Ok(DataType::Time64(TimeUnit::Nanosecond)), }, ( Some(LogicalType::Timestamp { @@ -239,9 +239,9 @@ fn from_int64(info: &BasicTypeInfo, scale: i32, precision: i32) -> Result Ok(DataType::Timestamp( match unit { - ParquetTimeUnit::MILLIS(_) => TimeUnit::Millisecond, - ParquetTimeUnit::MICROS(_) => TimeUnit::Microsecond, - ParquetTimeUnit::NANOS(_) => TimeUnit::Nanosecond, + ParquetTimeUnit::MILLIS => TimeUnit::Millisecond, + ParquetTimeUnit::MICROS => TimeUnit::Microsecond, + ParquetTimeUnit::NANOS => TimeUnit::Nanosecond, }, if is_adjusted_to_u_t_c { Some("UTC".into()) diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index c1e301136d0e..de53c57ce53a 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -15,28 +15,23 @@ // specific language governing permissions and limitations // under the License. -//! Contains Rust mappings for Thrift definition. -//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) file to see raw definitions. +//! Contains Rust mappings for Thrift definition. This module contains only mappings for thrift +//! enums and unions. Thrift structs are handled elsewhere. +//! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) +//! file to see raw definitions. use std::str::FromStr; use std::{fmt, str}; pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel}; -use crate::format as parquet; use crate::errors::{ParquetError, Result}; -// Re-export crate::format types used in this module -pub use crate::format::{ - BsonType, DateType, DecimalType, EnumType, IntType, JsonType, ListType, MapType, NullType, - StringType, TimeType, TimeUnit, TimestampType, UUIDType, -}; - // ---------------------------------------------------------------------- // Types from the Thrift definition // ---------------------------------------------------------------------- -// Mirrors `parquet::Type` +// Mirrors thrift enum `crate::format::Type` /// Types supported by Parquet. /// @@ -66,7 +61,7 @@ pub enum Type { } // ---------------------------------------------------------------------- -// Mirrors `parquet::ConvertedType` +// Mirrors thrift enum `crate::format::ConvertedType` /// Common types (converted types) used by frameworks when using Parquet. /// @@ -171,7 +166,21 @@ pub enum ConvertedType { } // ---------------------------------------------------------------------- -// Mirrors `parquet::LogicalType` +// Mirrors thrift union `crate::format::TimeUnit` + +/// Time unit for `Time` and `Timestamp` logical types. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum TimeUnit { + /// Milliseconds. + MILLIS, + /// Microseconds. + MICROS, + /// Nanoseconds. + NANOS, +} + +// ---------------------------------------------------------------------- +// Mirrors thrift union `crate::format::LogicalType` /// Logical types used by version 2.4.0+ of the Parquet format. /// @@ -237,7 +246,7 @@ pub enum LogicalType { } // ---------------------------------------------------------------------- -// Mirrors `parquet::FieldRepetitionType` +// Mirrors thrift enum `crate::format::FieldRepetitionType` /// Representation of field types in schema. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -252,7 +261,7 @@ pub enum Repetition { } // ---------------------------------------------------------------------- -// Mirrors `parquet::Encoding` +// Mirrors thrift enum `crate::format::Encoding` /// Encodings supported by Parquet. /// @@ -368,7 +377,7 @@ impl FromStr for Encoding { } // ---------------------------------------------------------------------- -// Mirrors `parquet::CompressionCodec` +// Mirrors thrift enum `crate::format::CompressionCodec` /// Supported block compression algorithms. /// @@ -497,7 +506,7 @@ impl FromStr for Compression { } // ---------------------------------------------------------------------- -/// Mirrors [parquet::PageType] +/// Mirrors thrift enum `crate::format::PageType` /// /// Available data pages for Parquet file format. /// Note that some of the page types may not be supported. @@ -515,7 +524,54 @@ pub enum PageType { } // ---------------------------------------------------------------------- -// Mirrors `parquet::ColumnOrder` +// Mirrors thrift enum `crate::format::BoundaryOrder` + +/// Enum to annotate whether lists of min/max elements inside ColumnIndex +/// are ordered and if so, in which direction. +#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] +pub enum BoundaryOrder { + /// Min/max stats are unordered. + UNORDERED, + /// Min/max stats are ordered in an ascending fashion. + ASCENDING, + /// Min/max stats are ordered in an descending fashion. + DESCENDING, +} + +// ---------------------------------------------------------------------- +// Mirrors thrift union `crate::format::BloomFilterAlgorithm` + +/// The algorithm used in Bloom filter. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BloomFilterAlgorithm { + /// Block-based Bloom filter. + BLOCK, +} + +// ---------------------------------------------------------------------- +// Mirrors thrift union `crate::format::BloomFilterHash` + +/// The hash function used in Bloom filter. This function takes the hash of a column value +/// using plain encoding. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BloomFilterHash { + /// xxHash is an extremely fast non-cryptographic hash algorithm. It uses 64 bits version + /// of xxHash. + XXHASH, +} + +// ---------------------------------------------------------------------- +// Mirrors thrift union `crate::format::BloomFilterCompression` + +/// The compression used in the Bloom filter. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum BloomFilterCompression { + /// No compression is used. + UNCOMPRESSED, +} + +// ---------------------------------------------------------------------- +// Mirrors thrift union `crate::format::ColumnOrder` /// Sort order for page and column statistics. /// @@ -660,6 +716,9 @@ impl ColumnOrder { } } +// ---------------------------------------------------------------------- +// Display handlers + impl fmt::Display for Type { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self:?}") @@ -709,73 +768,73 @@ impl fmt::Display for ColumnOrder { } // ---------------------------------------------------------------------- -// parquet::Type <=> Type conversion +// crate::format::Type <=> Type conversion -impl TryFrom for Type { +impl TryFrom for Type { type Error = ParquetError; - fn try_from(value: parquet::Type) -> Result { + fn try_from(value: crate::format::Type) -> Result { Ok(match value { - parquet::Type::BOOLEAN => Type::BOOLEAN, - parquet::Type::INT32 => Type::INT32, - parquet::Type::INT64 => Type::INT64, - parquet::Type::INT96 => Type::INT96, - parquet::Type::FLOAT => Type::FLOAT, - parquet::Type::DOUBLE => Type::DOUBLE, - parquet::Type::BYTE_ARRAY => Type::BYTE_ARRAY, - parquet::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY, + crate::format::Type::BOOLEAN => Type::BOOLEAN, + crate::format::Type::INT32 => Type::INT32, + crate::format::Type::INT64 => Type::INT64, + crate::format::Type::INT96 => Type::INT96, + crate::format::Type::FLOAT => Type::FLOAT, + crate::format::Type::DOUBLE => Type::DOUBLE, + crate::format::Type::BYTE_ARRAY => Type::BYTE_ARRAY, + crate::format::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY, _ => return Err(general_err!("unexpected parquet type: {}", value.0)), }) } } -impl From for parquet::Type { +impl From for crate::format::Type { fn from(value: Type) -> Self { match value { - Type::BOOLEAN => parquet::Type::BOOLEAN, - Type::INT32 => parquet::Type::INT32, - Type::INT64 => parquet::Type::INT64, - Type::INT96 => parquet::Type::INT96, - Type::FLOAT => parquet::Type::FLOAT, - Type::DOUBLE => parquet::Type::DOUBLE, - Type::BYTE_ARRAY => parquet::Type::BYTE_ARRAY, - Type::FIXED_LEN_BYTE_ARRAY => parquet::Type::FIXED_LEN_BYTE_ARRAY, + Type::BOOLEAN => crate::format::Type::BOOLEAN, + Type::INT32 => crate::format::Type::INT32, + Type::INT64 => crate::format::Type::INT64, + Type::INT96 => crate::format::Type::INT96, + Type::FLOAT => crate::format::Type::FLOAT, + Type::DOUBLE => crate::format::Type::DOUBLE, + Type::BYTE_ARRAY => crate::format::Type::BYTE_ARRAY, + Type::FIXED_LEN_BYTE_ARRAY => crate::format::Type::FIXED_LEN_BYTE_ARRAY, } } } // ---------------------------------------------------------------------- -// parquet::ConvertedType <=> ConvertedType conversion +// crate::format::ConvertedType <=> ConvertedType conversion -impl TryFrom> for ConvertedType { +impl TryFrom> for ConvertedType { type Error = ParquetError; - fn try_from(option: Option) -> Result { + fn try_from(option: Option) -> Result { Ok(match option { None => ConvertedType::NONE, Some(value) => match value { - parquet::ConvertedType::UTF8 => ConvertedType::UTF8, - parquet::ConvertedType::MAP => ConvertedType::MAP, - parquet::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE, - parquet::ConvertedType::LIST => ConvertedType::LIST, - parquet::ConvertedType::ENUM => ConvertedType::ENUM, - parquet::ConvertedType::DECIMAL => ConvertedType::DECIMAL, - parquet::ConvertedType::DATE => ConvertedType::DATE, - parquet::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS, - parquet::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS, - parquet::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS, - parquet::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS, - parquet::ConvertedType::UINT_8 => ConvertedType::UINT_8, - parquet::ConvertedType::UINT_16 => ConvertedType::UINT_16, - parquet::ConvertedType::UINT_32 => ConvertedType::UINT_32, - parquet::ConvertedType::UINT_64 => ConvertedType::UINT_64, - parquet::ConvertedType::INT_8 => ConvertedType::INT_8, - parquet::ConvertedType::INT_16 => ConvertedType::INT_16, - parquet::ConvertedType::INT_32 => ConvertedType::INT_32, - parquet::ConvertedType::INT_64 => ConvertedType::INT_64, - parquet::ConvertedType::JSON => ConvertedType::JSON, - parquet::ConvertedType::BSON => ConvertedType::BSON, - parquet::ConvertedType::INTERVAL => ConvertedType::INTERVAL, + crate::format::ConvertedType::UTF8 => ConvertedType::UTF8, + crate::format::ConvertedType::MAP => ConvertedType::MAP, + crate::format::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE, + crate::format::ConvertedType::LIST => ConvertedType::LIST, + crate::format::ConvertedType::ENUM => ConvertedType::ENUM, + crate::format::ConvertedType::DECIMAL => ConvertedType::DECIMAL, + crate::format::ConvertedType::DATE => ConvertedType::DATE, + crate::format::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS, + crate::format::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS, + crate::format::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS, + crate::format::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS, + crate::format::ConvertedType::UINT_8 => ConvertedType::UINT_8, + crate::format::ConvertedType::UINT_16 => ConvertedType::UINT_16, + crate::format::ConvertedType::UINT_32 => ConvertedType::UINT_32, + crate::format::ConvertedType::UINT_64 => ConvertedType::UINT_64, + crate::format::ConvertedType::INT_8 => ConvertedType::INT_8, + crate::format::ConvertedType::INT_16 => ConvertedType::INT_16, + crate::format::ConvertedType::INT_32 => ConvertedType::INT_32, + crate::format::ConvertedType::INT_64 => ConvertedType::INT_64, + crate::format::ConvertedType::JSON => ConvertedType::JSON, + crate::format::ConvertedType::BSON => ConvertedType::BSON, + crate::format::ConvertedType::INTERVAL => ConvertedType::INTERVAL, _ => { return Err(general_err!( "unexpected parquet converted type: {}", @@ -787,115 +846,201 @@ impl TryFrom> for ConvertedType { } } -impl From for Option { +impl From for Option { fn from(value: ConvertedType) -> Self { match value { ConvertedType::NONE => None, - ConvertedType::UTF8 => Some(parquet::ConvertedType::UTF8), - ConvertedType::MAP => Some(parquet::ConvertedType::MAP), - ConvertedType::MAP_KEY_VALUE => Some(parquet::ConvertedType::MAP_KEY_VALUE), - ConvertedType::LIST => Some(parquet::ConvertedType::LIST), - ConvertedType::ENUM => Some(parquet::ConvertedType::ENUM), - ConvertedType::DECIMAL => Some(parquet::ConvertedType::DECIMAL), - ConvertedType::DATE => Some(parquet::ConvertedType::DATE), - ConvertedType::TIME_MILLIS => Some(parquet::ConvertedType::TIME_MILLIS), - ConvertedType::TIME_MICROS => Some(parquet::ConvertedType::TIME_MICROS), - ConvertedType::TIMESTAMP_MILLIS => Some(parquet::ConvertedType::TIMESTAMP_MILLIS), - ConvertedType::TIMESTAMP_MICROS => Some(parquet::ConvertedType::TIMESTAMP_MICROS), - ConvertedType::UINT_8 => Some(parquet::ConvertedType::UINT_8), - ConvertedType::UINT_16 => Some(parquet::ConvertedType::UINT_16), - ConvertedType::UINT_32 => Some(parquet::ConvertedType::UINT_32), - ConvertedType::UINT_64 => Some(parquet::ConvertedType::UINT_64), - ConvertedType::INT_8 => Some(parquet::ConvertedType::INT_8), - ConvertedType::INT_16 => Some(parquet::ConvertedType::INT_16), - ConvertedType::INT_32 => Some(parquet::ConvertedType::INT_32), - ConvertedType::INT_64 => Some(parquet::ConvertedType::INT_64), - ConvertedType::JSON => Some(parquet::ConvertedType::JSON), - ConvertedType::BSON => Some(parquet::ConvertedType::BSON), - ConvertedType::INTERVAL => Some(parquet::ConvertedType::INTERVAL), + ConvertedType::UTF8 => Some(crate::format::ConvertedType::UTF8), + ConvertedType::MAP => Some(crate::format::ConvertedType::MAP), + ConvertedType::MAP_KEY_VALUE => Some(crate::format::ConvertedType::MAP_KEY_VALUE), + ConvertedType::LIST => Some(crate::format::ConvertedType::LIST), + ConvertedType::ENUM => Some(crate::format::ConvertedType::ENUM), + ConvertedType::DECIMAL => Some(crate::format::ConvertedType::DECIMAL), + ConvertedType::DATE => Some(crate::format::ConvertedType::DATE), + ConvertedType::TIME_MILLIS => Some(crate::format::ConvertedType::TIME_MILLIS), + ConvertedType::TIME_MICROS => Some(crate::format::ConvertedType::TIME_MICROS), + ConvertedType::TIMESTAMP_MILLIS => Some(crate::format::ConvertedType::TIMESTAMP_MILLIS), + ConvertedType::TIMESTAMP_MICROS => Some(crate::format::ConvertedType::TIMESTAMP_MICROS), + ConvertedType::UINT_8 => Some(crate::format::ConvertedType::UINT_8), + ConvertedType::UINT_16 => Some(crate::format::ConvertedType::UINT_16), + ConvertedType::UINT_32 => Some(crate::format::ConvertedType::UINT_32), + ConvertedType::UINT_64 => Some(crate::format::ConvertedType::UINT_64), + ConvertedType::INT_8 => Some(crate::format::ConvertedType::INT_8), + ConvertedType::INT_16 => Some(crate::format::ConvertedType::INT_16), + ConvertedType::INT_32 => Some(crate::format::ConvertedType::INT_32), + ConvertedType::INT_64 => Some(crate::format::ConvertedType::INT_64), + ConvertedType::JSON => Some(crate::format::ConvertedType::JSON), + ConvertedType::BSON => Some(crate::format::ConvertedType::BSON), + ConvertedType::INTERVAL => Some(crate::format::ConvertedType::INTERVAL), + } + } +} + +// ---------------------------------------------------------------------- +// crate::format::BloomFilterHash <=> BloomFilterHash conversion + +impl From for BloomFilterHash { + fn from(value: crate::format::BloomFilterHash) -> Self { + match value { + crate::format::BloomFilterHash::XXHASH(_) => BloomFilterHash::XXHASH, + } + } +} + +impl From for crate::format::BloomFilterHash { + fn from(value: BloomFilterHash) -> Self { + match value { + BloomFilterHash::XXHASH => crate::format::BloomFilterHash::XXHASH(Default::default()), + } + } +} + +// ---------------------------------------------------------------------- +// crate::format::BloomFilterAlgorithm <=> BloomFilterAlgorithm conversion + +impl From for BloomFilterAlgorithm { + fn from(value: crate::format::BloomFilterAlgorithm) -> Self { + match value { + crate::format::BloomFilterAlgorithm::BLOCK(_) => BloomFilterAlgorithm::BLOCK, + } + } +} + +impl From for crate::format::BloomFilterAlgorithm { + fn from(value: BloomFilterAlgorithm) -> Self { + match value { + BloomFilterAlgorithm::BLOCK => { + crate::format::BloomFilterAlgorithm::BLOCK(Default::default()) + } + } + } +} + +// ---------------------------------------------------------------------- +// crate::format::BloomFilterCompression <=> BloomFilterCompression conversion + +impl From for BloomFilterCompression { + fn from(value: crate::format::BloomFilterCompression) -> Self { + match value { + crate::format::BloomFilterCompression::UNCOMPRESSED(_) => { + BloomFilterCompression::UNCOMPRESSED + } + } + } +} + +impl From for crate::format::BloomFilterCompression { + fn from(value: BloomFilterCompression) -> Self { + match value { + BloomFilterCompression::UNCOMPRESSED => { + crate::format::BloomFilterCompression::UNCOMPRESSED(Default::default()) + } } } } // ---------------------------------------------------------------------- -// parquet::LogicalType <=> LogicalType conversion +// crate::format::TimeUnit <=> TimeUnit conversion -impl From for LogicalType { - fn from(value: parquet::LogicalType) -> Self { +impl From for TimeUnit { + fn from(value: crate::format::TimeUnit) -> Self { match value { - parquet::LogicalType::STRING(_) => LogicalType::String, - parquet::LogicalType::MAP(_) => LogicalType::Map, - parquet::LogicalType::LIST(_) => LogicalType::List, - parquet::LogicalType::ENUM(_) => LogicalType::Enum, - parquet::LogicalType::DECIMAL(t) => LogicalType::Decimal { + crate::format::TimeUnit::MILLIS(_) => TimeUnit::MILLIS, + crate::format::TimeUnit::MICROS(_) => TimeUnit::MICROS, + crate::format::TimeUnit::NANOS(_) => TimeUnit::NANOS, + } + } +} + +impl From for crate::format::TimeUnit { + fn from(value: TimeUnit) -> Self { + match value { + TimeUnit::MILLIS => crate::format::TimeUnit::MILLIS(crate::format::MilliSeconds {}), + TimeUnit::MICROS => crate::format::TimeUnit::MICROS(crate::format::MicroSeconds {}), + TimeUnit::NANOS => crate::format::TimeUnit::NANOS(crate::format::NanoSeconds {}), + } + } +} + +// ---------------------------------------------------------------------- +// crate::format::LogicalType <=> LogicalType conversion + +impl From for LogicalType { + fn from(value: crate::format::LogicalType) -> Self { + match value { + crate::format::LogicalType::STRING(_) => LogicalType::String, + crate::format::LogicalType::MAP(_) => LogicalType::Map, + crate::format::LogicalType::LIST(_) => LogicalType::List, + crate::format::LogicalType::ENUM(_) => LogicalType::Enum, + crate::format::LogicalType::DECIMAL(t) => LogicalType::Decimal { scale: t.scale, precision: t.precision, }, - parquet::LogicalType::DATE(_) => LogicalType::Date, - parquet::LogicalType::TIME(t) => LogicalType::Time { + crate::format::LogicalType::DATE(_) => LogicalType::Date, + crate::format::LogicalType::TIME(t) => LogicalType::Time { is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c, - unit: t.unit, + unit: t.unit.into(), }, - parquet::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp { + crate::format::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp { is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c, - unit: t.unit, + unit: t.unit.into(), }, - parquet::LogicalType::INTEGER(t) => LogicalType::Integer { + crate::format::LogicalType::INTEGER(t) => LogicalType::Integer { bit_width: t.bit_width, is_signed: t.is_signed, }, - parquet::LogicalType::UNKNOWN(_) => LogicalType::Unknown, - parquet::LogicalType::JSON(_) => LogicalType::Json, - parquet::LogicalType::BSON(_) => LogicalType::Bson, - parquet::LogicalType::UUID(_) => LogicalType::Uuid, - parquet::LogicalType::FLOAT16(_) => LogicalType::Float16, - parquet::LogicalType::VARIANT(_) => LogicalType::Variant, - parquet::LogicalType::GEOMETRY(_) => LogicalType::Geometry, - parquet::LogicalType::GEOGRAPHY(_) => LogicalType::Geography, + crate::format::LogicalType::UNKNOWN(_) => LogicalType::Unknown, + crate::format::LogicalType::JSON(_) => LogicalType::Json, + crate::format::LogicalType::BSON(_) => LogicalType::Bson, + crate::format::LogicalType::UUID(_) => LogicalType::Uuid, + crate::format::LogicalType::FLOAT16(_) => LogicalType::Float16, + crate::format::LogicalType::VARIANT(_) => LogicalType::Variant, + crate::format::LogicalType::GEOMETRY(_) => LogicalType::Geometry, + crate::format::LogicalType::GEOGRAPHY(_) => LogicalType::Geography, } } } -impl From for parquet::LogicalType { +impl From for crate::format::LogicalType { fn from(value: LogicalType) -> Self { match value { - LogicalType::String => parquet::LogicalType::STRING(Default::default()), - LogicalType::Map => parquet::LogicalType::MAP(Default::default()), - LogicalType::List => parquet::LogicalType::LIST(Default::default()), - LogicalType::Enum => parquet::LogicalType::ENUM(Default::default()), + LogicalType::String => crate::format::LogicalType::STRING(Default::default()), + LogicalType::Map => crate::format::LogicalType::MAP(Default::default()), + LogicalType::List => crate::format::LogicalType::LIST(Default::default()), + LogicalType::Enum => crate::format::LogicalType::ENUM(Default::default()), LogicalType::Decimal { scale, precision } => { - parquet::LogicalType::DECIMAL(DecimalType { scale, precision }) + crate::format::LogicalType::DECIMAL(crate::format::DecimalType { scale, precision }) } - LogicalType::Date => parquet::LogicalType::DATE(Default::default()), + LogicalType::Date => crate::format::LogicalType::DATE(Default::default()), LogicalType::Time { is_adjusted_to_u_t_c, unit, - } => parquet::LogicalType::TIME(TimeType { + } => crate::format::LogicalType::TIME(crate::format::TimeType { is_adjusted_to_u_t_c, - unit, + unit: unit.into(), }), LogicalType::Timestamp { is_adjusted_to_u_t_c, unit, - } => parquet::LogicalType::TIMESTAMP(TimestampType { + } => crate::format::LogicalType::TIMESTAMP(crate::format::TimestampType { is_adjusted_to_u_t_c, - unit, + unit: unit.into(), }), LogicalType::Integer { bit_width, is_signed, - } => parquet::LogicalType::INTEGER(IntType { + } => crate::format::LogicalType::INTEGER(crate::format::IntType { bit_width, is_signed, }), - LogicalType::Unknown => parquet::LogicalType::UNKNOWN(Default::default()), - LogicalType::Json => parquet::LogicalType::JSON(Default::default()), - LogicalType::Bson => parquet::LogicalType::BSON(Default::default()), - LogicalType::Uuid => parquet::LogicalType::UUID(Default::default()), - LogicalType::Float16 => parquet::LogicalType::FLOAT16(Default::default()), - LogicalType::Variant => parquet::LogicalType::VARIANT(Default::default()), - LogicalType::Geometry => parquet::LogicalType::GEOMETRY(Default::default()), - LogicalType::Geography => parquet::LogicalType::GEOGRAPHY(Default::default()), + LogicalType::Unknown => crate::format::LogicalType::UNKNOWN(Default::default()), + LogicalType::Json => crate::format::LogicalType::JSON(Default::default()), + LogicalType::Bson => crate::format::LogicalType::BSON(Default::default()), + LogicalType::Uuid => crate::format::LogicalType::UUID(Default::default()), + LogicalType::Float16 => crate::format::LogicalType::FLOAT16(Default::default()), + LogicalType::Variant => crate::format::LogicalType::VARIANT(Default::default()), + LogicalType::Geometry => crate::format::LogicalType::GEOMETRY(Default::default()), + LogicalType::Geography => crate::format::LogicalType::GEOGRAPHY(Default::default()), } } } @@ -920,14 +1065,14 @@ impl From> for ConvertedType { LogicalType::Decimal { .. } => ConvertedType::DECIMAL, LogicalType::Date => ConvertedType::DATE, LogicalType::Time { unit, .. } => match unit { - TimeUnit::MILLIS(_) => ConvertedType::TIME_MILLIS, - TimeUnit::MICROS(_) => ConvertedType::TIME_MICROS, - TimeUnit::NANOS(_) => ConvertedType::NONE, + TimeUnit::MILLIS => ConvertedType::TIME_MILLIS, + TimeUnit::MICROS => ConvertedType::TIME_MICROS, + TimeUnit::NANOS => ConvertedType::NONE, }, LogicalType::Timestamp { unit, .. } => match unit { - TimeUnit::MILLIS(_) => ConvertedType::TIMESTAMP_MILLIS, - TimeUnit::MICROS(_) => ConvertedType::TIMESTAMP_MICROS, - TimeUnit::NANOS(_) => ConvertedType::NONE, + TimeUnit::MILLIS => ConvertedType::TIMESTAMP_MILLIS, + TimeUnit::MICROS => ConvertedType::TIMESTAMP_MICROS, + TimeUnit::NANOS => ConvertedType::NONE, }, LogicalType::Integer { bit_width, @@ -958,16 +1103,16 @@ impl From> for ConvertedType { } // ---------------------------------------------------------------------- -// parquet::FieldRepetitionType <=> Repetition conversion +// crate::format::FieldRepetitionType <=> Repetition conversion -impl TryFrom for Repetition { +impl TryFrom for Repetition { type Error = ParquetError; - fn try_from(value: parquet::FieldRepetitionType) -> Result { + fn try_from(value: crate::format::FieldRepetitionType) -> Result { Ok(match value { - parquet::FieldRepetitionType::REQUIRED => Repetition::REQUIRED, - parquet::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL, - parquet::FieldRepetitionType::REPEATED => Repetition::REPEATED, + crate::format::FieldRepetitionType::REQUIRED => Repetition::REQUIRED, + crate::format::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL, + crate::format::FieldRepetitionType::REPEATED => Repetition::REPEATED, _ => { return Err(general_err!( "unexpected parquet repetition type: {}", @@ -978,72 +1123,72 @@ impl TryFrom for Repetition { } } -impl From for parquet::FieldRepetitionType { +impl From for crate::format::FieldRepetitionType { fn from(value: Repetition) -> Self { match value { - Repetition::REQUIRED => parquet::FieldRepetitionType::REQUIRED, - Repetition::OPTIONAL => parquet::FieldRepetitionType::OPTIONAL, - Repetition::REPEATED => parquet::FieldRepetitionType::REPEATED, + Repetition::REQUIRED => crate::format::FieldRepetitionType::REQUIRED, + Repetition::OPTIONAL => crate::format::FieldRepetitionType::OPTIONAL, + Repetition::REPEATED => crate::format::FieldRepetitionType::REPEATED, } } } // ---------------------------------------------------------------------- -// parquet::Encoding <=> Encoding conversion +// crate::format::Encoding <=> Encoding conversion -impl TryFrom for Encoding { +impl TryFrom for Encoding { type Error = ParquetError; - fn try_from(value: parquet::Encoding) -> Result { + fn try_from(value: crate::format::Encoding) -> Result { Ok(match value { - parquet::Encoding::PLAIN => Encoding::PLAIN, - parquet::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY, - parquet::Encoding::RLE => Encoding::RLE, + crate::format::Encoding::PLAIN => Encoding::PLAIN, + crate::format::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY, + crate::format::Encoding::RLE => Encoding::RLE, #[allow(deprecated)] - parquet::Encoding::BIT_PACKED => Encoding::BIT_PACKED, - parquet::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED, - parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY, - parquet::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY, - parquet::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY, - parquet::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT, + crate::format::Encoding::BIT_PACKED => Encoding::BIT_PACKED, + crate::format::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED, + crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY, + crate::format::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY, + crate::format::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY, + crate::format::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT, _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)), }) } } -impl From for parquet::Encoding { +impl From for crate::format::Encoding { fn from(value: Encoding) -> Self { match value { - Encoding::PLAIN => parquet::Encoding::PLAIN, - Encoding::PLAIN_DICTIONARY => parquet::Encoding::PLAIN_DICTIONARY, - Encoding::RLE => parquet::Encoding::RLE, + Encoding::PLAIN => crate::format::Encoding::PLAIN, + Encoding::PLAIN_DICTIONARY => crate::format::Encoding::PLAIN_DICTIONARY, + Encoding::RLE => crate::format::Encoding::RLE, #[allow(deprecated)] - Encoding::BIT_PACKED => parquet::Encoding::BIT_PACKED, - Encoding::DELTA_BINARY_PACKED => parquet::Encoding::DELTA_BINARY_PACKED, - Encoding::DELTA_LENGTH_BYTE_ARRAY => parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DELTA_BYTE_ARRAY => parquet::Encoding::DELTA_BYTE_ARRAY, - Encoding::RLE_DICTIONARY => parquet::Encoding::RLE_DICTIONARY, - Encoding::BYTE_STREAM_SPLIT => parquet::Encoding::BYTE_STREAM_SPLIT, + Encoding::BIT_PACKED => crate::format::Encoding::BIT_PACKED, + Encoding::DELTA_BINARY_PACKED => crate::format::Encoding::DELTA_BINARY_PACKED, + Encoding::DELTA_LENGTH_BYTE_ARRAY => crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY, + Encoding::DELTA_BYTE_ARRAY => crate::format::Encoding::DELTA_BYTE_ARRAY, + Encoding::RLE_DICTIONARY => crate::format::Encoding::RLE_DICTIONARY, + Encoding::BYTE_STREAM_SPLIT => crate::format::Encoding::BYTE_STREAM_SPLIT, } } } // ---------------------------------------------------------------------- -// parquet::CompressionCodec <=> Compression conversion +// crate::format::CompressionCodec <=> Compression conversion -impl TryFrom for Compression { +impl TryFrom for Compression { type Error = ParquetError; - fn try_from(value: parquet::CompressionCodec) -> Result { + fn try_from(value: crate::format::CompressionCodec) -> Result { Ok(match value { - parquet::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED, - parquet::CompressionCodec::SNAPPY => Compression::SNAPPY, - parquet::CompressionCodec::GZIP => Compression::GZIP(Default::default()), - parquet::CompressionCodec::LZO => Compression::LZO, - parquet::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()), - parquet::CompressionCodec::LZ4 => Compression::LZ4, - parquet::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()), - parquet::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW, + crate::format::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED, + crate::format::CompressionCodec::SNAPPY => Compression::SNAPPY, + crate::format::CompressionCodec::GZIP => Compression::GZIP(Default::default()), + crate::format::CompressionCodec::LZO => Compression::LZO, + crate::format::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()), + crate::format::CompressionCodec::LZ4 => Compression::LZ4, + crate::format::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()), + crate::format::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW, _ => { return Err(general_err!( "unexpected parquet compression codec: {}", @@ -1054,45 +1199,76 @@ impl TryFrom for Compression { } } -impl From for parquet::CompressionCodec { +impl From for crate::format::CompressionCodec { fn from(value: Compression) -> Self { match value { - Compression::UNCOMPRESSED => parquet::CompressionCodec::UNCOMPRESSED, - Compression::SNAPPY => parquet::CompressionCodec::SNAPPY, - Compression::GZIP(_) => parquet::CompressionCodec::GZIP, - Compression::LZO => parquet::CompressionCodec::LZO, - Compression::BROTLI(_) => parquet::CompressionCodec::BROTLI, - Compression::LZ4 => parquet::CompressionCodec::LZ4, - Compression::ZSTD(_) => parquet::CompressionCodec::ZSTD, - Compression::LZ4_RAW => parquet::CompressionCodec::LZ4_RAW, + Compression::UNCOMPRESSED => crate::format::CompressionCodec::UNCOMPRESSED, + Compression::SNAPPY => crate::format::CompressionCodec::SNAPPY, + Compression::GZIP(_) => crate::format::CompressionCodec::GZIP, + Compression::LZO => crate::format::CompressionCodec::LZO, + Compression::BROTLI(_) => crate::format::CompressionCodec::BROTLI, + Compression::LZ4 => crate::format::CompressionCodec::LZ4, + Compression::ZSTD(_) => crate::format::CompressionCodec::ZSTD, + Compression::LZ4_RAW => crate::format::CompressionCodec::LZ4_RAW, } } } // ---------------------------------------------------------------------- -// parquet::PageType <=> PageType conversion +// crate::format::PageType <=> PageType conversion -impl TryFrom for PageType { +impl TryFrom for PageType { type Error = ParquetError; - fn try_from(value: parquet::PageType) -> Result { + fn try_from(value: crate::format::PageType) -> Result { Ok(match value { - parquet::PageType::DATA_PAGE => PageType::DATA_PAGE, - parquet::PageType::INDEX_PAGE => PageType::INDEX_PAGE, - parquet::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE, - parquet::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2, + crate::format::PageType::DATA_PAGE => PageType::DATA_PAGE, + crate::format::PageType::INDEX_PAGE => PageType::INDEX_PAGE, + crate::format::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE, + crate::format::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2, _ => return Err(general_err!("unexpected parquet page type: {}", value.0)), }) } } -impl From for parquet::PageType { +impl From for crate::format::PageType { fn from(value: PageType) -> Self { match value { - PageType::DATA_PAGE => parquet::PageType::DATA_PAGE, - PageType::INDEX_PAGE => parquet::PageType::INDEX_PAGE, - PageType::DICTIONARY_PAGE => parquet::PageType::DICTIONARY_PAGE, - PageType::DATA_PAGE_V2 => parquet::PageType::DATA_PAGE_V2, + PageType::DATA_PAGE => crate::format::PageType::DATA_PAGE, + PageType::INDEX_PAGE => crate::format::PageType::INDEX_PAGE, + PageType::DICTIONARY_PAGE => crate::format::PageType::DICTIONARY_PAGE, + PageType::DATA_PAGE_V2 => crate::format::PageType::DATA_PAGE_V2, + } + } +} + +// ---------------------------------------------------------------------- +// crate::format::PageType <=> PageType conversion + +impl TryFrom for BoundaryOrder { + type Error = ParquetError; + + fn try_from(value: crate::format::BoundaryOrder) -> Result { + Ok(match value { + crate::format::BoundaryOrder::UNORDERED => BoundaryOrder::UNORDERED, + crate::format::BoundaryOrder::ASCENDING => BoundaryOrder::ASCENDING, + crate::format::BoundaryOrder::DESCENDING => BoundaryOrder::DESCENDING, + _ => { + return Err(general_err!( + "unexpected parquet boundary order type: {}", + value.0 + )) + } + }) + } +} + +impl From for crate::format::BoundaryOrder { + fn from(value: BoundaryOrder) -> Self { + match value { + BoundaryOrder::UNORDERED => crate::format::BoundaryOrder::UNORDERED, + BoundaryOrder::ASCENDING => crate::format::BoundaryOrder::ASCENDING, + BoundaryOrder::DESCENDING => crate::format::BoundaryOrder::DESCENDING, } } } @@ -1184,11 +1360,11 @@ impl str::FromStr for LogicalType { "DATE" => Ok(LogicalType::Date), "TIME" => Ok(LogicalType::Time { is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS(parquet::MilliSeconds {}), + unit: TimeUnit::MILLIS, }), "TIMESTAMP" => Ok(LogicalType::Timestamp { is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS(parquet::MilliSeconds {}), + unit: TimeUnit::MILLIS, }), "STRING" => Ok(LogicalType::String), "JSON" => Ok(LogicalType::Json), @@ -1227,35 +1403,50 @@ mod tests { #[test] fn test_from_type() { assert_eq!( - Type::try_from(parquet::Type::BOOLEAN).unwrap(), + Type::try_from(crate::format::Type::BOOLEAN).unwrap(), Type::BOOLEAN ); - assert_eq!(Type::try_from(parquet::Type::INT32).unwrap(), Type::INT32); - assert_eq!(Type::try_from(parquet::Type::INT64).unwrap(), Type::INT64); - assert_eq!(Type::try_from(parquet::Type::INT96).unwrap(), Type::INT96); - assert_eq!(Type::try_from(parquet::Type::FLOAT).unwrap(), Type::FLOAT); - assert_eq!(Type::try_from(parquet::Type::DOUBLE).unwrap(), Type::DOUBLE); assert_eq!( - Type::try_from(parquet::Type::BYTE_ARRAY).unwrap(), + Type::try_from(crate::format::Type::INT32).unwrap(), + Type::INT32 + ); + assert_eq!( + Type::try_from(crate::format::Type::INT64).unwrap(), + Type::INT64 + ); + assert_eq!( + Type::try_from(crate::format::Type::INT96).unwrap(), + Type::INT96 + ); + assert_eq!( + Type::try_from(crate::format::Type::FLOAT).unwrap(), + Type::FLOAT + ); + assert_eq!( + Type::try_from(crate::format::Type::DOUBLE).unwrap(), + Type::DOUBLE + ); + assert_eq!( + Type::try_from(crate::format::Type::BYTE_ARRAY).unwrap(), Type::BYTE_ARRAY ); assert_eq!( - Type::try_from(parquet::Type::FIXED_LEN_BYTE_ARRAY).unwrap(), + Type::try_from(crate::format::Type::FIXED_LEN_BYTE_ARRAY).unwrap(), Type::FIXED_LEN_BYTE_ARRAY ); } #[test] fn test_into_type() { - assert_eq!(parquet::Type::BOOLEAN, Type::BOOLEAN.into()); - assert_eq!(parquet::Type::INT32, Type::INT32.into()); - assert_eq!(parquet::Type::INT64, Type::INT64.into()); - assert_eq!(parquet::Type::INT96, Type::INT96.into()); - assert_eq!(parquet::Type::FLOAT, Type::FLOAT.into()); - assert_eq!(parquet::Type::DOUBLE, Type::DOUBLE.into()); - assert_eq!(parquet::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into()); - assert_eq!( - parquet::Type::FIXED_LEN_BYTE_ARRAY, + assert_eq!(crate::format::Type::BOOLEAN, Type::BOOLEAN.into()); + assert_eq!(crate::format::Type::INT32, Type::INT32.into()); + assert_eq!(crate::format::Type::INT64, Type::INT64.into()); + assert_eq!(crate::format::Type::INT96, Type::INT96.into()); + assert_eq!(crate::format::Type::FLOAT, Type::FLOAT.into()); + assert_eq!(crate::format::Type::DOUBLE, Type::DOUBLE.into()); + assert_eq!(crate::format::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into()); + assert_eq!( + crate::format::Type::FIXED_LEN_BYTE_ARRAY, Type::FIXED_LEN_BYTE_ARRAY.into() ); } @@ -1337,196 +1528,199 @@ mod tests { #[test] fn test_from_converted_type() { - let parquet_conv_none: Option = None; + let parquet_conv_none: Option = None; assert_eq!( ConvertedType::try_from(parquet_conv_none).unwrap(), ConvertedType::NONE ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::UTF8)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::UTF8)).unwrap(), ConvertedType::UTF8 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::MAP)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::MAP)).unwrap(), ConvertedType::MAP ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::MAP_KEY_VALUE)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::MAP_KEY_VALUE)).unwrap(), ConvertedType::MAP_KEY_VALUE ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::LIST)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::LIST)).unwrap(), ConvertedType::LIST ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::ENUM)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::ENUM)).unwrap(), ConvertedType::ENUM ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::DECIMAL)).unwrap(), ConvertedType::DECIMAL ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::DATE)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::DATE)).unwrap(), ConvertedType::DATE ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MILLIS)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::TIME_MILLIS)).unwrap(), ConvertedType::TIME_MILLIS ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::TIME_MICROS)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::TIME_MICROS)).unwrap(), ConvertedType::TIME_MICROS ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MILLIS)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::TIMESTAMP_MILLIS)).unwrap(), ConvertedType::TIMESTAMP_MILLIS ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::TIMESTAMP_MICROS)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::TIMESTAMP_MICROS)).unwrap(), ConvertedType::TIMESTAMP_MICROS ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::UINT_8)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_8)).unwrap(), ConvertedType::UINT_8 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::UINT_16)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_16)).unwrap(), ConvertedType::UINT_16 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::UINT_32)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_32)).unwrap(), ConvertedType::UINT_32 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::UINT_64)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_64)).unwrap(), ConvertedType::UINT_64 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::INT_8)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::INT_8)).unwrap(), ConvertedType::INT_8 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::INT_16)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::INT_16)).unwrap(), ConvertedType::INT_16 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::INT_32)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::INT_32)).unwrap(), ConvertedType::INT_32 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::INT_64)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::INT_64)).unwrap(), ConvertedType::INT_64 ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::JSON)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::JSON)).unwrap(), ConvertedType::JSON ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::BSON)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::BSON)).unwrap(), ConvertedType::BSON ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::INTERVAL)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::INTERVAL)).unwrap(), ConvertedType::INTERVAL ); assert_eq!( - ConvertedType::try_from(Some(parquet::ConvertedType::DECIMAL)).unwrap(), + ConvertedType::try_from(Some(crate::format::ConvertedType::DECIMAL)).unwrap(), ConvertedType::DECIMAL ) } #[test] fn test_into_converted_type() { - let converted_type: Option = None; + let converted_type: Option = None; assert_eq!(converted_type, ConvertedType::NONE.into()); assert_eq!( - Some(parquet::ConvertedType::UTF8), + Some(crate::format::ConvertedType::UTF8), ConvertedType::UTF8.into() ); - assert_eq!(Some(parquet::ConvertedType::MAP), ConvertedType::MAP.into()); assert_eq!( - Some(parquet::ConvertedType::MAP_KEY_VALUE), + Some(crate::format::ConvertedType::MAP), + ConvertedType::MAP.into() + ); + assert_eq!( + Some(crate::format::ConvertedType::MAP_KEY_VALUE), ConvertedType::MAP_KEY_VALUE.into() ); assert_eq!( - Some(parquet::ConvertedType::LIST), + Some(crate::format::ConvertedType::LIST), ConvertedType::LIST.into() ); assert_eq!( - Some(parquet::ConvertedType::ENUM), + Some(crate::format::ConvertedType::ENUM), ConvertedType::ENUM.into() ); assert_eq!( - Some(parquet::ConvertedType::DECIMAL), + Some(crate::format::ConvertedType::DECIMAL), ConvertedType::DECIMAL.into() ); assert_eq!( - Some(parquet::ConvertedType::DATE), + Some(crate::format::ConvertedType::DATE), ConvertedType::DATE.into() ); assert_eq!( - Some(parquet::ConvertedType::TIME_MILLIS), + Some(crate::format::ConvertedType::TIME_MILLIS), ConvertedType::TIME_MILLIS.into() ); assert_eq!( - Some(parquet::ConvertedType::TIME_MICROS), + Some(crate::format::ConvertedType::TIME_MICROS), ConvertedType::TIME_MICROS.into() ); assert_eq!( - Some(parquet::ConvertedType::TIMESTAMP_MILLIS), + Some(crate::format::ConvertedType::TIMESTAMP_MILLIS), ConvertedType::TIMESTAMP_MILLIS.into() ); assert_eq!( - Some(parquet::ConvertedType::TIMESTAMP_MICROS), + Some(crate::format::ConvertedType::TIMESTAMP_MICROS), ConvertedType::TIMESTAMP_MICROS.into() ); assert_eq!( - Some(parquet::ConvertedType::UINT_8), + Some(crate::format::ConvertedType::UINT_8), ConvertedType::UINT_8.into() ); assert_eq!( - Some(parquet::ConvertedType::UINT_16), + Some(crate::format::ConvertedType::UINT_16), ConvertedType::UINT_16.into() ); assert_eq!( - Some(parquet::ConvertedType::UINT_32), + Some(crate::format::ConvertedType::UINT_32), ConvertedType::UINT_32.into() ); assert_eq!( - Some(parquet::ConvertedType::UINT_64), + Some(crate::format::ConvertedType::UINT_64), ConvertedType::UINT_64.into() ); assert_eq!( - Some(parquet::ConvertedType::INT_8), + Some(crate::format::ConvertedType::INT_8), ConvertedType::INT_8.into() ); assert_eq!( - Some(parquet::ConvertedType::INT_16), + Some(crate::format::ConvertedType::INT_16), ConvertedType::INT_16.into() ); assert_eq!( - Some(parquet::ConvertedType::INT_32), + Some(crate::format::ConvertedType::INT_32), ConvertedType::INT_32.into() ); assert_eq!( - Some(parquet::ConvertedType::INT_64), + Some(crate::format::ConvertedType::INT_64), ConvertedType::INT_64.into() ); assert_eq!( - Some(parquet::ConvertedType::JSON), + Some(crate::format::ConvertedType::JSON), ConvertedType::JSON.into() ); assert_eq!( - Some(parquet::ConvertedType::BSON), + Some(crate::format::ConvertedType::BSON), ConvertedType::BSON.into() ); assert_eq!( - Some(parquet::ConvertedType::INTERVAL), + Some(crate::format::ConvertedType::INTERVAL), ConvertedType::INTERVAL.into() ); assert_eq!( - Some(parquet::ConvertedType::DECIMAL), + Some(crate::format::ConvertedType::DECIMAL), ConvertedType::DECIMAL.into() ) } @@ -1732,42 +1926,42 @@ mod tests { ); assert_eq!( ConvertedType::from(Some(LogicalType::Time { - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, is_adjusted_to_u_t_c: true, })), ConvertedType::TIME_MILLIS ); assert_eq!( ConvertedType::from(Some(LogicalType::Time { - unit: TimeUnit::MICROS(Default::default()), + unit: TimeUnit::MICROS, is_adjusted_to_u_t_c: true, })), ConvertedType::TIME_MICROS ); assert_eq!( ConvertedType::from(Some(LogicalType::Time { - unit: TimeUnit::NANOS(Default::default()), + unit: TimeUnit::NANOS, is_adjusted_to_u_t_c: false, })), ConvertedType::NONE ); assert_eq!( ConvertedType::from(Some(LogicalType::Timestamp { - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, is_adjusted_to_u_t_c: true, })), ConvertedType::TIMESTAMP_MILLIS ); assert_eq!( ConvertedType::from(Some(LogicalType::Timestamp { - unit: TimeUnit::MICROS(Default::default()), + unit: TimeUnit::MICROS, is_adjusted_to_u_t_c: false, })), ConvertedType::TIMESTAMP_MICROS ); assert_eq!( ConvertedType::from(Some(LogicalType::Timestamp { - unit: TimeUnit::NANOS(Default::default()), + unit: TimeUnit::NANOS, is_adjusted_to_u_t_c: false, })), ConvertedType::NONE @@ -1864,15 +2058,15 @@ mod tests { #[test] fn test_from_repetition() { assert_eq!( - Repetition::try_from(parquet::FieldRepetitionType::REQUIRED).unwrap(), + Repetition::try_from(crate::format::FieldRepetitionType::REQUIRED).unwrap(), Repetition::REQUIRED ); assert_eq!( - Repetition::try_from(parquet::FieldRepetitionType::OPTIONAL).unwrap(), + Repetition::try_from(crate::format::FieldRepetitionType::OPTIONAL).unwrap(), Repetition::OPTIONAL ); assert_eq!( - Repetition::try_from(parquet::FieldRepetitionType::REPEATED).unwrap(), + Repetition::try_from(crate::format::FieldRepetitionType::REPEATED).unwrap(), Repetition::REPEATED ); } @@ -1880,15 +2074,15 @@ mod tests { #[test] fn test_into_repetition() { assert_eq!( - parquet::FieldRepetitionType::REQUIRED, + crate::format::FieldRepetitionType::REQUIRED, Repetition::REQUIRED.into() ); assert_eq!( - parquet::FieldRepetitionType::OPTIONAL, + crate::format::FieldRepetitionType::OPTIONAL, Repetition::OPTIONAL.into() ); assert_eq!( - parquet::FieldRepetitionType::REPEATED, + crate::format::FieldRepetitionType::REPEATED, Repetition::REPEATED.into() ); } @@ -1939,54 +2133,57 @@ mod tests { #[test] fn test_from_encoding() { assert_eq!( - Encoding::try_from(parquet::Encoding::PLAIN).unwrap(), + Encoding::try_from(crate::format::Encoding::PLAIN).unwrap(), Encoding::PLAIN ); assert_eq!( - Encoding::try_from(parquet::Encoding::PLAIN_DICTIONARY).unwrap(), + Encoding::try_from(crate::format::Encoding::PLAIN_DICTIONARY).unwrap(), Encoding::PLAIN_DICTIONARY ); assert_eq!( - Encoding::try_from(parquet::Encoding::RLE).unwrap(), + Encoding::try_from(crate::format::Encoding::RLE).unwrap(), Encoding::RLE ); assert_eq!( - Encoding::try_from(parquet::Encoding::BIT_PACKED).unwrap(), + Encoding::try_from(crate::format::Encoding::BIT_PACKED).unwrap(), Encoding::BIT_PACKED ); assert_eq!( - Encoding::try_from(parquet::Encoding::DELTA_BINARY_PACKED).unwrap(), + Encoding::try_from(crate::format::Encoding::DELTA_BINARY_PACKED).unwrap(), Encoding::DELTA_BINARY_PACKED ); assert_eq!( - Encoding::try_from(parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(), + Encoding::try_from(crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(), Encoding::DELTA_LENGTH_BYTE_ARRAY ); assert_eq!( - Encoding::try_from(parquet::Encoding::DELTA_BYTE_ARRAY).unwrap(), + Encoding::try_from(crate::format::Encoding::DELTA_BYTE_ARRAY).unwrap(), Encoding::DELTA_BYTE_ARRAY ); } #[test] fn test_into_encoding() { - assert_eq!(parquet::Encoding::PLAIN, Encoding::PLAIN.into()); + assert_eq!(crate::format::Encoding::PLAIN, Encoding::PLAIN.into()); assert_eq!( - parquet::Encoding::PLAIN_DICTIONARY, + crate::format::Encoding::PLAIN_DICTIONARY, Encoding::PLAIN_DICTIONARY.into() ); - assert_eq!(parquet::Encoding::RLE, Encoding::RLE.into()); - assert_eq!(parquet::Encoding::BIT_PACKED, Encoding::BIT_PACKED.into()); + assert_eq!(crate::format::Encoding::RLE, Encoding::RLE.into()); + assert_eq!( + crate::format::Encoding::BIT_PACKED, + Encoding::BIT_PACKED.into() + ); assert_eq!( - parquet::Encoding::DELTA_BINARY_PACKED, + crate::format::Encoding::DELTA_BINARY_PACKED, Encoding::DELTA_BINARY_PACKED.into() ); assert_eq!( - parquet::Encoding::DELTA_LENGTH_BYTE_ARRAY, + crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY, Encoding::DELTA_LENGTH_BYTE_ARRAY.into() ); assert_eq!( - parquet::Encoding::DELTA_BYTE_ARRAY, + crate::format::Encoding::DELTA_BYTE_ARRAY, Encoding::DELTA_BYTE_ARRAY.into() ); } @@ -2023,31 +2220,31 @@ mod tests { #[test] fn test_from_compression() { assert_eq!( - Compression::try_from(parquet::CompressionCodec::UNCOMPRESSED).unwrap(), + Compression::try_from(crate::format::CompressionCodec::UNCOMPRESSED).unwrap(), Compression::UNCOMPRESSED ); assert_eq!( - Compression::try_from(parquet::CompressionCodec::SNAPPY).unwrap(), + Compression::try_from(crate::format::CompressionCodec::SNAPPY).unwrap(), Compression::SNAPPY ); assert_eq!( - Compression::try_from(parquet::CompressionCodec::GZIP).unwrap(), + Compression::try_from(crate::format::CompressionCodec::GZIP).unwrap(), Compression::GZIP(Default::default()) ); assert_eq!( - Compression::try_from(parquet::CompressionCodec::LZO).unwrap(), + Compression::try_from(crate::format::CompressionCodec::LZO).unwrap(), Compression::LZO ); assert_eq!( - Compression::try_from(parquet::CompressionCodec::BROTLI).unwrap(), + Compression::try_from(crate::format::CompressionCodec::BROTLI).unwrap(), Compression::BROTLI(Default::default()) ); assert_eq!( - Compression::try_from(parquet::CompressionCodec::LZ4).unwrap(), + Compression::try_from(crate::format::CompressionCodec::LZ4).unwrap(), Compression::LZ4 ); assert_eq!( - Compression::try_from(parquet::CompressionCodec::ZSTD).unwrap(), + Compression::try_from(crate::format::CompressionCodec::ZSTD).unwrap(), Compression::ZSTD(Default::default()) ); } @@ -2055,25 +2252,31 @@ mod tests { #[test] fn test_into_compression() { assert_eq!( - parquet::CompressionCodec::UNCOMPRESSED, + crate::format::CompressionCodec::UNCOMPRESSED, Compression::UNCOMPRESSED.into() ); assert_eq!( - parquet::CompressionCodec::SNAPPY, + crate::format::CompressionCodec::SNAPPY, Compression::SNAPPY.into() ); assert_eq!( - parquet::CompressionCodec::GZIP, + crate::format::CompressionCodec::GZIP, Compression::GZIP(Default::default()).into() ); - assert_eq!(parquet::CompressionCodec::LZO, Compression::LZO.into()); assert_eq!( - parquet::CompressionCodec::BROTLI, + crate::format::CompressionCodec::LZO, + Compression::LZO.into() + ); + assert_eq!( + crate::format::CompressionCodec::BROTLI, Compression::BROTLI(Default::default()).into() ); - assert_eq!(parquet::CompressionCodec::LZ4, Compression::LZ4.into()); assert_eq!( - parquet::CompressionCodec::ZSTD, + crate::format::CompressionCodec::LZ4, + Compression::LZ4.into() + ); + assert_eq!( + crate::format::CompressionCodec::ZSTD, Compression::ZSTD(Default::default()).into() ); } @@ -2089,33 +2292,39 @@ mod tests { #[test] fn test_from_page_type() { assert_eq!( - PageType::try_from(parquet::PageType::DATA_PAGE).unwrap(), + PageType::try_from(crate::format::PageType::DATA_PAGE).unwrap(), PageType::DATA_PAGE ); assert_eq!( - PageType::try_from(parquet::PageType::INDEX_PAGE).unwrap(), + PageType::try_from(crate::format::PageType::INDEX_PAGE).unwrap(), PageType::INDEX_PAGE ); assert_eq!( - PageType::try_from(parquet::PageType::DICTIONARY_PAGE).unwrap(), + PageType::try_from(crate::format::PageType::DICTIONARY_PAGE).unwrap(), PageType::DICTIONARY_PAGE ); assert_eq!( - PageType::try_from(parquet::PageType::DATA_PAGE_V2).unwrap(), + PageType::try_from(crate::format::PageType::DATA_PAGE_V2).unwrap(), PageType::DATA_PAGE_V2 ); } #[test] fn test_into_page_type() { - assert_eq!(parquet::PageType::DATA_PAGE, PageType::DATA_PAGE.into()); - assert_eq!(parquet::PageType::INDEX_PAGE, PageType::INDEX_PAGE.into()); assert_eq!( - parquet::PageType::DICTIONARY_PAGE, + crate::format::PageType::DATA_PAGE, + PageType::DATA_PAGE.into() + ); + assert_eq!( + crate::format::PageType::INDEX_PAGE, + PageType::INDEX_PAGE.into() + ); + assert_eq!( + crate::format::PageType::DICTIONARY_PAGE, PageType::DICTIONARY_PAGE.into() ); assert_eq!( - parquet::PageType::DATA_PAGE_V2, + crate::format::PageType::DATA_PAGE_V2, PageType::DATA_PAGE_V2.into() ); } @@ -2208,27 +2417,27 @@ mod tests { LogicalType::Date, LogicalType::Time { is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, }, LogicalType::Time { is_adjusted_to_u_t_c: false, - unit: TimeUnit::MICROS(Default::default()), + unit: TimeUnit::MICROS, }, LogicalType::Time { is_adjusted_to_u_t_c: true, - unit: TimeUnit::NANOS(Default::default()), + unit: TimeUnit::NANOS, }, LogicalType::Timestamp { is_adjusted_to_u_t_c: false, - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, }, LogicalType::Timestamp { is_adjusted_to_u_t_c: false, - unit: TimeUnit::MICROS(Default::default()), + unit: TimeUnit::MICROS, }, LogicalType::Timestamp { is_adjusted_to_u_t_c: true, - unit: TimeUnit::NANOS(Default::default()), + unit: TimeUnit::NANOS, }, LogicalType::Float16, ]; diff --git a/parquet/src/bin/parquet-index.rs b/parquet/src/bin/parquet-index.rs index 1a9b74dd78fb..e91f5e5a9f17 100644 --- a/parquet/src/bin/parquet-index.rs +++ b/parquet/src/bin/parquet-index.rs @@ -37,10 +37,9 @@ use clap::Parser; use parquet::errors::{ParquetError, Result}; use parquet::file::page_index::index::{Index, PageIndex}; -use parquet::file::page_index::offset_index::OffsetIndexMetaData; +use parquet::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; use parquet::file::reader::{FileReader, SerializedFileReader}; use parquet::file::serialized_reader::ReadOptionsBuilder; -use parquet::format::PageLocation; use std::fs::File; #[derive(Debug, Parser)] diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs index 384a4a10486e..f7dc098bd0bc 100644 --- a/parquet/src/bloom_filter/mod.rs +++ b/parquet/src/bloom_filter/mod.rs @@ -72,14 +72,11 @@ //! [sbbf-paper]: https://arxiv.org/pdf/2101.01719 //! [bf-formulae]: http://tfk.mit.edu/pdf/bloom.pdf +use crate::basic::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash}; use crate::data_type::AsBytes; use crate::errors::ParquetError; use crate::file::metadata::ColumnChunkMetaData; use crate::file::reader::ChunkReader; -use crate::format::{ - BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash, BloomFilterHeader, - SplitBlockAlgorithm, Uncompressed, XxHash, -}; use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; use bytes::Bytes; use std::io::Write; @@ -98,6 +95,43 @@ const SALT: [u32; 8] = [ 0x5c6bfb31_u32, ]; +/// Bloom filter header is stored at beginning of Bloom filter data of each column +/// and followed by its bitset. +/// +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct BloomFilterHeader { + /// The size of bitset in bytes * + pub num_bytes: i32, + /// The algorithm for setting bits. * + pub algorithm: BloomFilterAlgorithm, + /// The hash function used for Bloom filter. * + pub hash: BloomFilterHash, + /// The compression used in the Bloom filter * + pub compression: BloomFilterCompression, +} + +impl From for BloomFilterHeader { + fn from(value: crate::format::BloomFilterHeader) -> Self { + Self { + num_bytes: value.num_bytes, + algorithm: value.algorithm.into(), + hash: value.hash.into(), + compression: value.compression.into(), + } + } +} + +impl From for crate::format::BloomFilterHeader { + fn from(value: BloomFilterHeader) -> Self { + Self { + num_bytes: value.num_bytes, + algorithm: value.algorithm.into(), + hash: value.hash.into(), + compression: value.compression.into(), + } + } +} + /// Each block is 256 bits, broken up into eight contiguous "words", each consisting of 32 bits. /// Each word is thought of as an array of bits; each bit is either "set" or "not set". #[derive(Debug, Copy, Clone)] @@ -195,9 +229,9 @@ pub(crate) fn read_bloom_filter_header_and_length( ) -> Result<(BloomFilterHeader, u64), ParquetError> { let total_length = buffer.len(); let mut prot = TCompactSliceInputProtocol::new(buffer.as_ref()); - let header = BloomFilterHeader::read_from_in_protocol(&mut prot) + let header = crate::format::BloomFilterHeader::read_from_in_protocol(&mut prot) .map_err(|e| ParquetError::General(format!("Could not read bloom filter header: {e}")))?; - Ok((header, (total_length - prot.as_slice().len()) as u64)) + Ok((header.into(), (total_length - prot.as_slice().len()) as u64)) } pub(crate) const BITSET_MIN_LENGTH: usize = 32; @@ -262,7 +296,7 @@ impl Sbbf { /// must remember to flush the writer. pub(crate) fn write(&self, mut writer: W) -> Result<(), ParquetError> { let mut protocol = TCompactOutputProtocol::new(&mut writer); - let header = self.header(); + let header: crate::format::BloomFilterHeader = self.header().into(); header.write_to_out_protocol(&mut protocol).map_err(|e| { ParquetError::General(format!("Could not write bloom filter header: {e}")) })?; @@ -305,9 +339,9 @@ impl Sbbf { BloomFilterHeader { // 8 i32 per block, 4 bytes per i32 num_bytes: self.0.len() as i32 * 4 * 8, - algorithm: BloomFilterAlgorithm::BLOCK(SplitBlockAlgorithm {}), - hash: BloomFilterHash::XXHASH(XxHash {}), - compression: BloomFilterCompression::UNCOMPRESSED(Uncompressed {}), + algorithm: BloomFilterAlgorithm::BLOCK, + hash: BloomFilterHash::XXHASH, + compression: BloomFilterCompression::UNCOMPRESSED, } } @@ -333,17 +367,17 @@ impl Sbbf { chunk_read_bloom_filter_header_and_offset(offset, buffer.clone())?; match header.algorithm { - BloomFilterAlgorithm::BLOCK(_) => { + BloomFilterAlgorithm::BLOCK => { // this match exists to future proof the singleton algorithm enum } } match header.compression { - BloomFilterCompression::UNCOMPRESSED(_) => { + BloomFilterCompression::UNCOMPRESSED => { // this match exists to future proof the singleton compression enum } } match header.hash { - BloomFilterHash::XXHASH(_) => { + BloomFilterHash::XXHASH => { // this match exists to future proof the singleton hash enum } } @@ -471,15 +505,9 @@ mod tests { read_length, ) = read_bloom_filter_header_and_length(Bytes::copy_from_slice(buffer)).unwrap(); assert_eq!(read_length, 15); - assert_eq!( - algorithm, - BloomFilterAlgorithm::BLOCK(SplitBlockAlgorithm {}) - ); - assert_eq!( - compression, - BloomFilterCompression::UNCOMPRESSED(Uncompressed {}) - ); - assert_eq!(hash, BloomFilterHash::XXHASH(XxHash {})); + assert_eq!(algorithm, BloomFilterAlgorithm::BLOCK); + assert_eq!(compression, BloomFilterCompression::UNCOMPRESSED); + assert_eq!(hash, BloomFilterHash::XXHASH); assert_eq!(num_bytes, 32_i32); assert_eq!(20, SBBF_HEADER_SIZE_ESTIMATE); } diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 9374e226b87f..1e6f4f6f0706 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -21,11 +21,14 @@ use bytes::Bytes; use half::f16; use crate::bloom_filter::Sbbf; -use crate::format::{BoundaryOrder, ColumnIndex, OffsetIndex}; +use crate::file::page_index::index::Index; +use crate::file::page_index::offset_index::OffsetIndexMetaData; use std::collections::{BTreeSet, VecDeque}; use std::str; -use crate::basic::{Compression, ConvertedType, Encoding, LogicalType, PageType, Type}; +use crate::basic::{ + BoundaryOrder, Compression, ConvertedType, Encoding, LogicalType, PageType, Type, +}; use crate::column::page::{CompressedPage, Page, PageWriteSpec, PageWriter}; use crate::column::writer::encoder::{ColumnValueEncoder, ColumnValueEncoderImpl, ColumnValues}; use crate::compression::{create_codec, Codec, CodecOptionsBuilder}; @@ -185,9 +188,9 @@ pub struct ColumnCloseResult { /// Optional bloom filter for this column pub bloom_filter: Option, /// Optional column index, for filtering - pub column_index: Option, + pub column_index: Option, /// Optional offset index, identifying page locations - pub offset_index: Option, + pub offset_index: Option, } // Metrics per page @@ -384,7 +387,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> { } // Disable column_index_builder if not collecting page statistics. - let mut column_index_builder = ColumnIndexBuilder::new(); + let mut column_index_builder = ColumnIndexBuilder::new(descr.physical_type()); if statistics_enabled != EnabledStatistics::Page { column_index_builder.to_invalid() } @@ -615,12 +618,12 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> { }; self.column_index_builder.set_boundary_order(boundary_order); - let column_index = self - .column_index_builder - .valid() - .then(|| self.column_index_builder.build_to_thrift()); + let column_index = match self.column_index_builder.valid() { + true => Some(self.column_index_builder.build()?), + false => None, + }; - let offset_index = self.offset_index_builder.map(|b| b.build_to_thrift()); + let offset_index = self.offset_index_builder.map(|b| b.build()); Ok(ColumnCloseResult { bytes_written: self.column_metrics.total_bytes_written, @@ -2939,19 +2942,29 @@ mod tests { let r = writer.close().unwrap(); assert!(r.column_index.is_some()); let col_idx = r.column_index.unwrap(); + let col_idx = match col_idx { + Index::INT32(col_idx) => col_idx, + _ => panic!("wrong stats type"), + }; // null_pages should be true for page 0 - assert!(col_idx.null_pages[0]); + assert!(col_idx.indexes[0].is_null_page()); // min and max should be empty byte arrays - assert_eq!(col_idx.min_values[0].len(), 0); - assert_eq!(col_idx.max_values[0].len(), 0); + assert!(col_idx.indexes[0].min().is_none()); + assert!(col_idx.indexes[0].max().is_none()); // null_counts should be defined and be 4 for page 0 - assert!(col_idx.null_counts.is_some()); - assert_eq!(col_idx.null_counts.as_ref().unwrap()[0], 4); + assert!(col_idx.indexes[0].null_count().is_some()); + assert_eq!(col_idx.indexes[0].null_count().unwrap(), 4); // there is no repetition so rep histogram should be absent - assert!(col_idx.repetition_level_histograms.is_none()); + assert!(col_idx.indexes[0].repetition_level_histogram().is_none()); // definition_level_histogram should be present and should be 0:4, 1:0 - assert!(col_idx.definition_level_histograms.is_some()); - assert_eq!(col_idx.definition_level_histograms.unwrap(), &[4, 0]); + assert!(col_idx.indexes[0].definition_level_histogram().is_some()); + assert_eq!( + col_idx.indexes[0] + .definition_level_histogram() + .unwrap() + .values(), + &[4, 0] + ); } #[test] @@ -2974,12 +2987,16 @@ mod tests { assert_eq!(8, r.rows_written); // column index - assert_eq!(2, column_index.null_pages.len()); + let column_index = match column_index { + Index::INT32(column_index) => column_index, + _ => panic!("wrong stats type"), + }; + assert_eq!(2, column_index.indexes.len()); assert_eq!(2, offset_index.page_locations.len()); assert_eq!(BoundaryOrder::UNORDERED, column_index.boundary_order); for idx in 0..2 { - assert!(!column_index.null_pages[idx]); - assert_eq!(0, column_index.null_counts.as_ref().unwrap()[idx]); + assert!(!column_index.indexes[idx].is_null_page()); + assert_eq!(0, *column_index.indexes[idx].null_count.as_ref().unwrap()); } if let Some(stats) = r.metadata.statistics() { @@ -2989,14 +3006,8 @@ mod tests { // first page is [1,2,3,4] // second page is [-5,2,4,8] // note that we don't increment here, as this is a non BinaryArray type. - assert_eq!( - stats.min_bytes_opt(), - Some(column_index.min_values[1].as_slice()) - ); - assert_eq!( - stats.max_bytes_opt(), - column_index.max_values.get(1).map(Vec::as_slice) - ); + assert_eq!(stats.min_opt(), column_index.indexes[1].min()); + assert_eq!(stats.max_opt(), column_index.indexes[1].max()); } else { panic!("expecting Statistics::Int32"); } @@ -3036,37 +3047,36 @@ mod tests { let column_index = r.column_index.unwrap(); let offset_index = r.offset_index.unwrap(); + let column_index = match column_index { + Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + _ => panic!("wrong stats type"), + }; + assert_eq!(3, r.rows_written); // column index - assert_eq!(1, column_index.null_pages.len()); + assert_eq!(1, column_index.indexes.len()); assert_eq!(1, offset_index.page_locations.len()); assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order); - assert!(!column_index.null_pages[0]); - assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]); + assert!(!column_index.indexes[0].is_null_page()); + assert_eq!(Some(0), column_index.indexes[0].null_count()); if let Some(stats) = r.metadata.statistics() { assert_eq!(stats.null_count_opt(), Some(0)); assert_eq!(stats.distinct_count_opt(), None); if let Statistics::FixedLenByteArray(stats) = stats { - let column_index_min_value = &column_index.min_values[0]; - let column_index_max_value = &column_index.max_values[0]; + let column_index_min_value = column_index.indexes[0].min_bytes().unwrap(); + let column_index_max_value = column_index.indexes[0].max_bytes().unwrap(); // Column index stats are truncated, while the column chunk's aren't. - assert_ne!( - stats.min_bytes_opt(), - Some(column_index_min_value.as_slice()) - ); - assert_ne!( - stats.max_bytes_opt(), - Some(column_index_max_value.as_slice()) - ); + assert_ne!(stats.min_bytes_opt().unwrap(), column_index_min_value); + assert_ne!(stats.max_bytes_opt().unwrap(), column_index_max_value); assert_eq!( column_index_min_value.len(), DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH.unwrap() ); - assert_eq!(column_index_min_value.as_slice(), &[97_u8; 64]); + assert_eq!(column_index_min_value, &[97_u8; 64]); assert_eq!( column_index_max_value.len(), DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH.unwrap() @@ -3108,27 +3118,32 @@ mod tests { let column_index = r.column_index.unwrap(); let offset_index = r.offset_index.unwrap(); + let column_index = match column_index { + Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + _ => panic!("wrong stats type"), + }; + assert_eq!(1, r.rows_written); // column index - assert_eq!(1, column_index.null_pages.len()); + assert_eq!(1, column_index.indexes.len()); assert_eq!(1, offset_index.page_locations.len()); assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order); - assert!(!column_index.null_pages[0]); - assert_eq!(0, column_index.null_counts.as_ref().unwrap()[0]); + assert!(!column_index.indexes[0].is_null_page()); + assert_eq!(Some(0), column_index.indexes[0].null_count()); if let Some(stats) = r.metadata.statistics() { assert_eq!(stats.null_count_opt(), Some(0)); assert_eq!(stats.distinct_count_opt(), None); if let Statistics::FixedLenByteArray(_stats) = stats { - let column_index_min_value = &column_index.min_values[0]; - let column_index_max_value = &column_index.max_values[0]; + let column_index_min_value = column_index.indexes[0].min_bytes().unwrap(); + let column_index_max_value = column_index.indexes[0].max_bytes().unwrap(); assert_eq!(column_index_min_value.len(), 1); assert_eq!(column_index_max_value.len(), 1); - assert_eq!("B".as_bytes(), column_index_min_value.as_slice()); - assert_eq!("C".as_bytes(), column_index_max_value.as_slice()); + assert_eq!("B".as_bytes(), column_index_min_value); + assert_eq!("C".as_bytes(), column_index_max_value); assert_ne!(column_index_min_value, stats.min_bytes_opt().unwrap()); assert_ne!(column_index_max_value, stats.max_bytes_opt().unwrap()); @@ -3158,8 +3173,12 @@ mod tests { // stats should still be written // ensure bytes weren't truncated for column index let column_index = r.column_index.unwrap(); - let column_index_min_bytes = column_index.min_values[0].as_slice(); - let column_index_max_bytes = column_index.max_values[0].as_slice(); + let column_index = match column_index { + Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + _ => panic!("wrong stats type"), + }; + let column_index_min_bytes = column_index.indexes[0].min_bytes().unwrap(); + let column_index_max_bytes = column_index.indexes[0].min_bytes().unwrap(); assert_eq!(expected_value, column_index_min_bytes); assert_eq!(expected_value, column_index_max_bytes); @@ -3197,8 +3216,12 @@ mod tests { // stats should still be written // ensure bytes weren't truncated for column index let column_index = r.column_index.unwrap(); - let column_index_min_bytes = column_index.min_values[0].as_slice(); - let column_index_max_bytes = column_index.max_values[0].as_slice(); + let column_index = match column_index { + Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + _ => panic!("wrong stats type"), + }; + let column_index_min_bytes = column_index.indexes[0].min_bytes().unwrap(); + let column_index_max_bytes = column_index.indexes[0].min_bytes().unwrap(); assert_eq!(expected_value, column_index_min_bytes); assert_eq!(expected_value, column_index_max_bytes); @@ -3678,8 +3701,11 @@ mod tests { &[Some(-5), Some(11)], ], )?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::ASCENDING); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING)); // min max both descending let column_close_result = write_multiple_pages::( @@ -3691,34 +3717,49 @@ mod tests { &[Some(-5), Some(0)], ], )?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::DESCENDING); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::DESCENDING)); // min max both equal let column_close_result = write_multiple_pages::( &descr, &[&[Some(10), Some(11)], &[None], &[Some(10), Some(11)]], )?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::ASCENDING); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING)); // only nulls let column_close_result = write_multiple_pages::(&descr, &[&[None], &[None], &[None]])?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::ASCENDING); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING)); // one page let column_close_result = write_multiple_pages::(&descr, &[&[Some(-10), Some(10)]])?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::ASCENDING); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING)); // one non-null page let column_close_result = write_multiple_pages::(&descr, &[&[Some(-10), Some(10)], &[None]])?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::ASCENDING); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::ASCENDING)); // min max both unordered let column_close_result = write_multiple_pages::( @@ -3730,8 +3771,11 @@ mod tests { &[Some(-5), Some(0)], ], )?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::UNORDERED); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::UNORDERED)); // min max both ordered in different orders let column_close_result = write_multiple_pages::( @@ -3743,8 +3787,11 @@ mod tests { &[Some(3), Some(7)], ], )?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::UNORDERED); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::UNORDERED)); Ok(()) } @@ -3781,14 +3828,20 @@ mod tests { // f16 descending let column_close_result = write_multiple_pages::(&f16_descr, values)?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::DESCENDING); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::DESCENDING)); // same bytes, but fba unordered let column_close_result = write_multiple_pages::(&fba_descr, values)?; - let boundary_order = column_close_result.column_index.unwrap().boundary_order; - assert_eq!(boundary_order, BoundaryOrder::UNORDERED); + let boundary_order = column_close_result + .column_index + .unwrap() + .get_boundary_order(); + assert_eq!(boundary_order, Some(BoundaryOrder::UNORDERED)); Ok(()) } diff --git a/parquet/src/file/metadata/memory.rs b/parquet/src/file/metadata/memory.rs index ad452267901a..0b8d3b336fc0 100644 --- a/parquet/src/file/metadata/memory.rs +++ b/parquet/src/file/metadata/memory.rs @@ -18,14 +18,15 @@ //! Memory calculations for [`ParquetMetadata::memory_size`] //! //! [`ParquetMetadata::memory_size`]: crate::file::metadata::ParquetMetaData::memory_size -use crate::basic::{ColumnOrder, Compression, Encoding, PageType}; +use crate::basic::{BoundaryOrder, ColumnOrder, Compression, Encoding, PageType}; use crate::data_type::private::ParquetValueType; -use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, KeyValue, RowGroupMetaData}; +use crate::file::metadata::{ + ColumnChunkMetaData, FileMetaData, KeyValue, RowGroupMetaData, SortingColumn, +}; use crate::file::page_encoding_stats::PageEncodingStats; use crate::file::page_index::index::{Index, NativeIndex, PageIndex}; -use crate::file::page_index::offset_index::OffsetIndexMetaData; +use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; use crate::file::statistics::{Statistics, ValueStatistics}; -use crate::format::{BoundaryOrder, PageLocation, SortingColumn}; use std::sync::Arc; /// Trait for calculating the size of various containers diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 04129c6aa482..193b70d9dd4a 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -101,25 +101,32 @@ use crate::encryption::{ decrypt::FileDecryptor, modules::{create_module_aad, ModuleType}, }; -use crate::errors::{ParquetError, Result}; #[cfg(feature = "encryption")] use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData}; pub(crate) use crate::file::metadata::memory::HeapSize; -use crate::file::page_encoding_stats::{self, PageEncodingStats}; -use crate::file::page_index::index::Index; -use crate::file::page_index::offset_index::OffsetIndexMetaData; -use crate::file::statistics::{self, Statistics}; -use crate::format::ColumnCryptoMetaData as TColumnCryptoMetaData; -use crate::format::{ - BoundaryOrder, ColumnChunk, ColumnIndex, ColumnMetaData, OffsetIndex, PageLocation, RowGroup, - SizeStatistics, SortingColumn, +use crate::file::page_index::index::{Index, NativeIndex}; +use crate::file::{ + page_encoding_stats::{self, PageEncodingStats}, + page_index::offset_index::PageLocation, +}; +use crate::file::{ + page_index::index::PageIndex, + statistics::{self, Statistics}, }; +use crate::format::ColumnCryptoMetaData as TColumnCryptoMetaData; use crate::schema::types::{ ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor, Type as SchemaType, }; #[cfg(feature = "encryption")] use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; +use crate::{ + basic::BoundaryOrder, + errors::{ParquetError, Result}, +}; +use crate::{ + data_type::private::ParquetValueType, file::page_index::offset_index::OffsetIndexMetaData, +}; pub use reader::{FooterTail, ParquetMetaDataReader}; use std::ops::Range; use std::sync::Arc; @@ -141,6 +148,7 @@ pub(crate) use writer::ThriftMetadataWriter; /// column in the third row group of the parquet file. /// /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md +/// [`ColumnIndex`]: crate::format::ColumnIndex pub type ParquetColumnIndex = Vec>; /// [`OffsetIndexMetaData`] for each data page of each row group of each column @@ -153,6 +161,7 @@ pub type ParquetColumnIndex = Vec>; /// `column_number`of row group `row_group_number`. /// /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md +/// [`OffsetIndex`]: crate::format::OffsetIndex pub type ParquetOffsetIndex = Vec>; /// Parsed metadata for a single Parquet file @@ -415,7 +424,26 @@ impl From for ParquetMetaDataBuilder { } /// A key-value pair for [`FileMetaData`]. -pub type KeyValue = crate::format::KeyValue; +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct KeyValue { + /// The key. + pub key: String, + /// An optional value. + pub value: Option, +} + +impl KeyValue { + /// Create a new key value pair + pub fn new(key: String, value: F2) -> KeyValue + where + F2: Into>, + { + KeyValue { + key, + value: value.into(), + } + } +} /// Reference counted pointer for [`FileMetaData`]. pub type FileMetaDataPtr = Arc; @@ -518,6 +546,38 @@ impl FileMetaData { } } +/// Sort order within a RowGroup of a leaf column +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct SortingColumn { + /// The ordinal position of the column (in this row group) * + pub column_idx: i32, + /// If true, indicates this column is sorted in descending order. * + pub descending: bool, + /// If true, nulls will come before non-null values, otherwise, + /// nulls go at the end. + pub nulls_first: bool, +} + +impl From<&crate::format::SortingColumn> for SortingColumn { + fn from(value: &crate::format::SortingColumn) -> Self { + Self { + column_idx: value.column_idx, + descending: value.descending, + nulls_first: value.nulls_first, + } + } +} + +impl From<&SortingColumn> for crate::format::SortingColumn { + fn from(value: &SortingColumn) -> Self { + Self { + column_idx: value.column_idx, + descending: value.descending, + nulls_first: value.nulls_first, + } + } +} + /// Reference counted pointer for [`RowGroupMetaData`]. pub type RowGroupMetaDataPtr = Arc; @@ -613,7 +673,7 @@ impl RowGroupMetaData { #[cfg(feature = "encryption")] fn from_encrypted_thrift( schema_descr: SchemaDescPtr, - mut rg: RowGroup, + mut rg: crate::format::RowGroup, decryptor: Option<&FileDecryptor>, ) -> Result { if schema_descr.num_columns() != rg.columns.len() { @@ -673,12 +733,18 @@ impl RowGroupMetaData { })?; let mut prot = TCompactSliceInputProtocol::new(decrypted_cc_buf.as_slice()); - c.meta_data = Some(ColumnMetaData::read_from_in_protocol(&mut prot)?); + c.meta_data = Some(crate::format::ColumnMetaData::read_from_in_protocol( + &mut prot, + )?); } columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?); } - let sorting_columns = rg.sorting_columns; + let sorting_columns = rg.sorting_columns.map(|scs| { + scs.iter() + .map(|sc| sc.into()) + .collect::>() + }); Ok(RowGroupMetaData { columns, num_rows, @@ -691,7 +757,10 @@ impl RowGroupMetaData { } /// Method to convert from Thrift. - pub fn from_thrift(schema_descr: SchemaDescPtr, mut rg: RowGroup) -> Result { + pub fn from_thrift( + schema_descr: SchemaDescPtr, + mut rg: crate::format::RowGroup, + ) -> Result { if schema_descr.num_columns() != rg.columns.len() { return Err(general_err!( "Column count mismatch. Schema has {} columns while Row Group has {}", @@ -707,7 +776,11 @@ impl RowGroupMetaData { columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?); } - let sorting_columns = rg.sorting_columns; + let sorting_columns = rg.sorting_columns.map(|scs| { + scs.iter() + .map(|sc| sc.into()) + .collect::>() + }); Ok(RowGroupMetaData { columns, num_rows, @@ -720,12 +793,17 @@ impl RowGroupMetaData { } /// Method to convert to Thrift. - pub fn to_thrift(&self) -> RowGroup { - RowGroup { + pub fn to_thrift(&self) -> crate::format::RowGroup { + let sorting_columns = self.sorting_columns().map(|scs| { + scs.iter() + .map(|sc| sc.into()) + .collect::>() + }); + crate::format::RowGroup { columns: self.columns().iter().map(|v| v.to_thrift()).collect(), total_byte_size: self.total_byte_size, num_rows: self.num_rows, - sorting_columns: self.sorting_columns().cloned(), + sorting_columns, file_offset: self.file_offset(), total_compressed_size: Some(self.compressed_size()), ordinal: self.ordinal, @@ -1143,11 +1221,14 @@ impl ColumnChunkMetaData { } /// Method to convert from Thrift. - pub fn from_thrift(column_descr: ColumnDescPtr, cc: ColumnChunk) -> Result { + pub fn from_thrift( + column_descr: ColumnDescPtr, + cc: crate::format::ColumnChunk, + ) -> Result { if cc.meta_data.is_none() { return Err(general_err!("Expected to have column metadata")); } - let mut col_metadata: ColumnMetaData = cc.meta_data.unwrap(); + let mut col_metadata: crate::format::ColumnMetaData = cc.meta_data.unwrap(); let column_type = Type::try_from(col_metadata.type_)?; let encodings = col_metadata .encodings @@ -1233,10 +1314,10 @@ impl ColumnChunkMetaData { } /// Method to convert to Thrift. - pub fn to_thrift(&self) -> ColumnChunk { + pub fn to_thrift(&self) -> crate::format::ColumnChunk { let column_metadata = self.to_column_metadata_thrift(); - ColumnChunk { + crate::format::ColumnChunk { file_path: self.file_path().map(|s| s.to_owned()), file_offset: self.file_offset, meta_data: Some(column_metadata), @@ -1250,7 +1331,7 @@ impl ColumnChunkMetaData { } /// Method to convert to Thrift `ColumnMetaData` - pub fn to_column_metadata_thrift(&self) -> ColumnMetaData { + pub fn to_column_metadata_thrift(&self) -> crate::format::ColumnMetaData { let size_statistics = if self.unencoded_byte_array_data_bytes.is_some() || self.repetition_level_histogram.is_some() || self.definition_level_histogram.is_some() @@ -1265,7 +1346,7 @@ impl ColumnChunkMetaData { .as_ref() .map(|hist| hist.clone().into_inner()); - Some(SizeStatistics { + Some(crate::format::SizeStatistics { unencoded_byte_array_data_bytes: self.unencoded_byte_array_data_bytes, repetition_level_histogram, definition_level_histogram, @@ -1274,7 +1355,7 @@ impl ColumnChunkMetaData { None }; - ColumnMetaData { + crate::format::ColumnMetaData { type_: self.column_type().into(), encodings: self.encodings().iter().map(|&v| v.into()).collect(), path_in_schema: self.column_path().as_ref().to_vec(), @@ -1517,7 +1598,9 @@ impl ColumnChunkMetaDataBuilder { /// Builder for Parquet [`ColumnIndex`], part of the Parquet [PageIndex] /// /// [PageIndex]: https://github.com/apache/parquet-format/blob/master/PageIndex.md +/// [`ColumnIndex`]: crate::format::ColumnIndex pub struct ColumnIndexBuilder { + column_type: Type, null_pages: Vec, min_values: Vec>, max_values: Vec>, @@ -1537,16 +1620,11 @@ pub struct ColumnIndexBuilder { valid: bool, } -impl Default for ColumnIndexBuilder { - fn default() -> Self { - Self::new() - } -} - impl ColumnIndexBuilder { /// Creates a new column index builder. - pub fn new() -> Self { + pub fn new(column_type: Type) -> Self { ColumnIndexBuilder { + column_type, null_pages: Vec::new(), min_values: Vec::new(), max_values: Vec::new(), @@ -1574,6 +1652,8 @@ impl ColumnIndexBuilder { /// Append the given page-level histograms to the [`ColumnIndex`] histograms. /// Does nothing if the `ColumnIndexBuilder` is not in the `valid` state. + /// + /// [`ColumnIndex`]: crate::format::ColumnIndex pub fn append_histograms( &mut self, repetition_level_histogram: &Option, @@ -1612,17 +1692,151 @@ impl ColumnIndexBuilder { /// Build and get the thrift metadata of column index /// /// Note: callers should check [`Self::valid`] before calling this method - pub fn build_to_thrift(self) -> ColumnIndex { - ColumnIndex::new( + pub fn build_to_thrift(self) -> crate::format::ColumnIndex { + crate::format::ColumnIndex::new( self.null_pages, self.min_values, self.max_values, - self.boundary_order, + self.boundary_order.into(), self.null_counts, self.repetition_level_histograms, self.definition_level_histograms, ) } + + /// Build and get the column index + /// + /// Note: callers should check [`Self::valid`] before calling this method + pub fn build(self) -> Result { + Ok(match self.column_type { + Type::BOOLEAN => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::BOOLEAN(NativeIndex { + indexes, + boundary_order, + }) + } + Type::INT32 => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::INT32(NativeIndex { + indexes, + boundary_order, + }) + } + Type::INT64 => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::INT64(NativeIndex { + indexes, + boundary_order, + }) + } + Type::INT96 => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::INT96(NativeIndex { + indexes, + boundary_order, + }) + } + Type::FLOAT => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::FLOAT(NativeIndex { + indexes, + boundary_order, + }) + } + Type::DOUBLE => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::DOUBLE(NativeIndex { + indexes, + boundary_order, + }) + } + Type::BYTE_ARRAY => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::BYTE_ARRAY(NativeIndex { + indexes, + boundary_order, + }) + } + Type::FIXED_LEN_BYTE_ARRAY => { + let (indexes, boundary_order) = self.build_page_index()?; + Index::FIXED_LEN_BYTE_ARRAY(NativeIndex { + indexes, + boundary_order, + }) + } + }) + } + + fn build_page_index(self) -> Result<(Vec>, BoundaryOrder)> + where + T: ParquetValueType, + { + let len = self.min_values.len(); + + let null_counts = self + .null_counts + .iter() + .map(|x| Some(*x)) + .collect::>(); + + // histograms are a 1D array encoding a 2D num_pages X num_levels matrix. + let to_page_histograms = |opt_hist: Option>| { + if let Some(hist) = opt_hist { + // TODO: should we assert (hist.len() % len) == 0? + let num_levels = hist.len() / len; + let mut res = Vec::with_capacity(len); + for i in 0..len { + let page_idx = i * num_levels; + let page_hist = hist[page_idx..page_idx + num_levels].to_vec(); + res.push(Some(LevelHistogram::from(page_hist))); + } + res + } else { + vec![None; len] + } + }; + + let rep_hists: Vec> = + to_page_histograms(self.repetition_level_histograms); + let def_hists: Vec> = + to_page_histograms(self.definition_level_histograms); + + let indexes = self + .min_values + .iter() + .zip(self.max_values.iter()) + .zip(self.null_pages.into_iter()) + .zip(null_counts.into_iter()) + .zip(rep_hists.into_iter()) + .zip(def_hists.into_iter()) + .map( + |( + ((((min, max), is_null), null_count), repetition_level_histogram), + definition_level_histogram, + )| { + let (min, max) = if is_null { + (None, None) + } else { + ( + Some(T::try_from_le_slice(min)?), + Some(T::try_from_le_slice(max)?), + ) + }; + Ok(PageIndex { + min, + max, + null_count, + repetition_level_histogram, + definition_level_histogram, + }) + }, + ) + .collect::, ParquetError>>()?; + + let boundary_order = self.boundary_order; + Ok((indexes, boundary_order)) + } } impl From for ColumnChunkMetaDataBuilder { @@ -1686,15 +1900,36 @@ impl OffsetIndexBuilder { } /// Build and get the thrift metadata of offset index - pub fn build_to_thrift(self) -> OffsetIndex { + pub fn build_to_thrift(self) -> crate::format::OffsetIndex { let locations = self .offset_array .iter() .zip(self.compressed_page_size_array.iter()) .zip(self.first_row_index_array.iter()) - .map(|((offset, size), row_index)| PageLocation::new(*offset, *size, *row_index)) + .map(|((offset, size), row_index)| { + crate::format::PageLocation::new(*offset, *size, *row_index) + }) .collect::>(); - OffsetIndex::new(locations, self.unencoded_byte_array_data_bytes_array) + crate::format::OffsetIndex::new(locations, self.unencoded_byte_array_data_bytes_array) + } + + /// Build and get the thrift metadata of offset index + pub fn build(self) -> OffsetIndexMetaData { + let locations = self + .offset_array + .iter() + .zip(self.compressed_page_size_array.iter()) + .zip(self.first_row_index_array.iter()) + .map(|((offset, size), row_index)| PageLocation { + offset: *offset, + compressed_page_size: *size, + first_row_index: *row_index, + }) + .collect::>(); + OffsetIndexMetaData { + page_locations: locations, + unencoded_byte_array_data_bytes: self.unencoded_byte_array_data_bytes_array, + } } } @@ -1974,7 +2209,7 @@ mod tests { assert_eq!(parquet_meta.memory_size(), base_expected_size); - let mut column_index = ColumnIndexBuilder::new(); + let mut column_index = ColumnIndexBuilder::new(Type::BOOLEAN); column_index.append(false, vec![1u8], vec![2u8, 3u8], 4); let column_index = column_index.build_to_thrift(); let native_index = NativeIndex::::try_new(column_index).unwrap(); diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 356713837530..53ae01221976 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -17,12 +17,12 @@ use std::{io::Read, ops::Range, sync::Arc}; -use crate::basic::ColumnOrder; #[cfg(feature = "encryption")] use crate::encryption::{ decrypt::{FileDecryptionProperties, FileDecryptor}, modules::create_footer_aad, }; +use crate::{basic::ColumnOrder, file::metadata::KeyValue}; use bytes::Bytes; use crate::errors::{ParquetError, Result}; @@ -31,7 +31,6 @@ use crate::file::page_index::index::Index; use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index}; use crate::file::reader::ChunkReader; use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER}; -use crate::format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData}; #[cfg(feature = "encryption")] use crate::format::{EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData}; use crate::schema::types; @@ -947,7 +946,7 @@ impl ParquetMetaDataReader { } } - let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot) + let t_file_metadata = crate::format::FileMetaData::read_from_in_protocol(&mut prot) .map_err(|e| general_err!("Could not parse metadata: {}", e))?; let schema = types::from_thrift(&t_file_metadata.schema)?; let schema_descr = Arc::new(SchemaDescriptor::new(schema)); @@ -980,11 +979,17 @@ impl ParquetMetaDataReader { let column_orders = Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?; + let key_value_metadata = t_file_metadata.key_value_metadata.map(|vkv| { + vkv.into_iter() + .map(|kv| KeyValue::new(kv.key, kv.value)) + .collect::>() + }); + let file_metadata = FileMetaData::new( t_file_metadata.version, t_file_metadata.num_rows, t_file_metadata.created_by, - t_file_metadata.key_value_metadata, + key_value_metadata, schema_descr, column_orders, ); @@ -1005,7 +1010,7 @@ impl ParquetMetaDataReader { pub fn decode_metadata(buf: &[u8]) -> Result { let mut prot = TCompactSliceInputProtocol::new(buf); - let t_file_metadata: TFileMetaData = TFileMetaData::read_from_in_protocol(&mut prot) + let t_file_metadata = crate::format::FileMetaData::read_from_in_protocol(&mut prot) .map_err(|e| general_err!("Could not parse metadata: {}", e))?; let schema = types::from_thrift(&t_file_metadata.schema)?; let schema_descr = Arc::new(SchemaDescriptor::new(schema)); @@ -1017,11 +1022,17 @@ impl ParquetMetaDataReader { let column_orders = Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?; + let key_value_metadata = t_file_metadata.key_value_metadata.map(|vkv| { + vkv.into_iter() + .map(|kv| KeyValue::new(kv.key, kv.value)) + .collect::>() + }); + let file_metadata = FileMetaData::new( t_file_metadata.version, t_file_metadata.num_rows, t_file_metadata.created_by, - t_file_metadata.key_value_metadata, + key_value_metadata, schema_descr, column_orders, ); @@ -1032,7 +1043,7 @@ impl ParquetMetaDataReader { /// Parses column orders from Thrift definition. /// If no column orders are defined, returns `None`. fn parse_column_orders( - t_column_orders: Option>, + t_column_orders: Option>, schema_descr: &SchemaDescriptor, ) -> Result>> { match t_column_orders { @@ -1044,7 +1055,7 @@ impl ParquetMetaDataReader { let mut res = Vec::new(); for (i, column) in schema_descr.columns().iter().enumerate() { match orders[i] { - TColumnOrder::TYPEORDER(_) => { + crate::format::ColumnOrder::TYPEORDER(_) => { let sort_order = ColumnOrder::get_sort_order( column.logical_type(), column.converted_type(), @@ -1099,7 +1110,6 @@ mod tests { use crate::basic::SortOrder; use crate::basic::Type; use crate::file::reader::Length; - use crate::format::TypeDefinedOrder; use crate::schema::types::Type as SchemaType; use crate::util::test_common::file_util::get_test_file; @@ -1153,8 +1163,8 @@ mod tests { let schema_descr = SchemaDescriptor::new(Arc::new(schema)); let t_column_orders = Some(vec![ - TColumnOrder::TYPEORDER(TypeDefinedOrder::new()), - TColumnOrder::TYPEORDER(TypeDefinedOrder::new()), + crate::format::ColumnOrder::TYPEORDER(Default::default()), + crate::format::ColumnOrder::TYPEORDER(Default::default()), ]); assert_eq!( @@ -1177,7 +1187,9 @@ mod tests { let schema = SchemaType::group_type_builder("schema").build().unwrap(); let schema_descr = SchemaDescriptor::new(Arc::new(schema)); - let t_column_orders = Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]); + let t_column_orders = Some(vec![crate::format::ColumnOrder::TYPEORDER( + Default::default(), + )]); let res = ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr); assert!(res.is_err()); diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 5bb59b6b2faf..acae20ec3cef 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -31,7 +31,6 @@ use crate::file::writer::{get_file_magic, TrackedWrite}; use crate::format::EncryptionAlgorithm; #[cfg(feature = "encryption")] use crate::format::{AesGcmV1, ColumnCryptoMetaData}; -use crate::format::{ColumnChunk, ColumnIndex, FileMetaData, OffsetIndex, RowGroup}; use crate::schema::types; use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr}; use crate::thrift::TSerializable; @@ -46,9 +45,9 @@ pub(crate) struct ThriftMetadataWriter<'a, W: Write> { buf: &'a mut TrackedWrite, schema: &'a TypePtr, schema_descr: &'a SchemaDescPtr, - row_groups: Vec, - column_indexes: Option<&'a [Vec>]>, - offset_indexes: Option<&'a [Vec>]>, + row_groups: Vec, + column_indexes: Option<&'a [Vec>]>, + offset_indexes: Option<&'a [Vec>]>, key_value_metadata: Option>, created_by: Option, object_writer: MetadataObjectWriter, @@ -61,7 +60,10 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { /// Note: also updates the `ColumnChunk::offset_index_offset` and /// `ColumnChunk::offset_index_length` to reflect the position and length /// of the serialized offset indexes. - fn write_offset_indexes(&mut self, offset_indexes: &[Vec>]) -> Result<()> { + fn write_offset_indexes( + &mut self, + offset_indexes: &[Vec>], + ) -> Result<()> { // iter row group // iter each column // write offset index to the file @@ -91,7 +93,10 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { /// Note: also updates the `ColumnChunk::column_index_offset` and /// `ColumnChunk::column_index_length` to reflect the position and length /// of the serialized column indexes. - fn write_column_indexes(&mut self, column_indexes: &[Vec>]) -> Result<()> { + fn write_column_indexes( + &mut self, + column_indexes: &[Vec>], + ) -> Result<()> { // iter row group // iter each column // write column index to the file @@ -146,10 +151,15 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { let (encryption_algorithm, footer_signing_key_metadata) = self.object_writer.get_plaintext_footer_crypto_metadata(); - let mut file_metadata = FileMetaData { + let key_value_metadata = self.key_value_metadata.map(|vkv| { + vkv.into_iter() + .map(|kv| crate::format::KeyValue::new(kv.key, kv.value)) + .collect::>() + }); + let mut file_metadata = crate::format::FileMetaData { num_rows, row_groups, - key_value_metadata: self.key_value_metadata.clone(), + key_value_metadata, version: self.writer_version, schema: types::to_thrift(self.schema.as_ref())?, created_by: self.created_by.clone(), @@ -185,7 +195,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { buf: &'a mut TrackedWrite, schema: &'a TypePtr, schema_descr: &'a SchemaDescPtr, - row_groups: Vec, + row_groups: Vec, created_by: Option, writer_version: i32, ) -> Self { @@ -203,12 +213,18 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { } } - pub fn with_column_indexes(mut self, column_indexes: &'a [Vec>]) -> Self { + pub fn with_column_indexes( + mut self, + column_indexes: &'a [Vec>], + ) -> Self { self.column_indexes = Some(column_indexes); self } - pub fn with_offset_indexes(mut self, offset_indexes: &'a [Vec>]) -> Self { + pub fn with_offset_indexes( + mut self, + offset_indexes: &'a [Vec>], + ) -> Self { self.offset_indexes = Some(offset_indexes); self } @@ -257,6 +273,8 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { /// /// [`FileMetaData`]: crate::format::FileMetaData /// [`ColumnChunkMetaData`]: crate::file::metadata::ColumnChunkMetaData +/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md +/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md /// /// ```text /// ┌──────────────────────┐ @@ -365,7 +383,7 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { Ok(()) } - fn convert_column_indexes(&self) -> Vec>> { + fn convert_column_indexes(&self) -> Vec>> { if let Some(row_group_column_indexes) = self.metadata.column_index() { (0..self.metadata.row_groups().len()) .map(|rg_idx| { @@ -398,7 +416,7 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { } } - fn convert_offset_index(&self) -> Vec>> { + fn convert_offset_index(&self) -> Vec>> { if let Some(row_group_offset_indexes) = self.metadata.offset_index() { (0..self.metadata.row_groups().len()) .map(|rg_idx| { @@ -439,15 +457,19 @@ impl MetadataObjectWriter { #[cfg(not(feature = "encryption"))] impl MetadataObjectWriter { /// Write [`FileMetaData`] in Thrift format - fn write_file_metadata(&self, file_metadata: &FileMetaData, sink: impl Write) -> Result<()> { + fn write_file_metadata( + &self, + file_metadata: &crate::format::FileMetaData, + sink: impl Write, + ) -> Result<()> { Self::write_object(file_metadata, sink) } /// Write a column [`OffsetIndex`] in Thrift format fn write_offset_index( &self, - offset_index: &OffsetIndex, - _column_chunk: &ColumnChunk, + offset_index: &crate::format::OffsetIndex, + _column_chunk: &crate::format::ColumnChunk, _row_group_idx: usize, _column_idx: usize, sink: impl Write, @@ -458,8 +480,8 @@ impl MetadataObjectWriter { /// Write a column [`ColumnIndex`] in Thrift format fn write_column_index( &self, - column_index: &ColumnIndex, - _column_chunk: &ColumnChunk, + column_index: &crate::format::ColumnIndex, + _column_chunk: &crate::format::ColumnChunk, _row_group_idx: usize, _column_idx: usize, sink: impl Write, @@ -470,8 +492,11 @@ impl MetadataObjectWriter { /// No-op implementation of row-group metadata encryption fn apply_row_group_encryption( &self, - row_groups: Vec, - ) -> Result<(Vec, Option>)> { + row_groups: Vec, + ) -> Result<( + Vec, + Option>, + )> { Ok((row_groups, None)) } @@ -497,9 +522,11 @@ impl MetadataObjectWriter { } /// Write [`FileMetaData`] in Thrift format, possibly encrypting it if required + /// + /// [`FileMetaData`]: crate::format::FileMetaData fn write_file_metadata( &self, - file_metadata: &FileMetaData, + file_metadata: &crate::format::FileMetaData, mut sink: impl Write, ) -> Result<()> { match self.file_encryptor.as_ref() { @@ -524,10 +551,12 @@ impl MetadataObjectWriter { } /// Write a column [`OffsetIndex`] in Thrift format, possibly encrypting it if required + /// + /// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md fn write_offset_index( &self, - offset_index: &OffsetIndex, - column_chunk: &ColumnChunk, + offset_index: &crate::format::OffsetIndex, + column_chunk: &crate::format::ColumnChunk, row_group_idx: usize, column_idx: usize, sink: impl Write, @@ -547,10 +576,12 @@ impl MetadataObjectWriter { } /// Write a column [`ColumnIndex`] in Thrift format, possibly encrypting it if required + /// + /// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md fn write_column_index( &self, - column_index: &ColumnIndex, - column_chunk: &ColumnChunk, + column_index: &crate::format::ColumnIndex, + column_chunk: &crate::format::ColumnChunk, row_group_idx: usize, column_idx: usize, sink: impl Write, @@ -574,8 +605,11 @@ impl MetadataObjectWriter { /// and possibly unencrypted metadata to be returned to clients if data was encrypted. fn apply_row_group_encryption( &self, - row_groups: Vec, - ) -> Result<(Vec, Option>)> { + row_groups: Vec, + ) -> Result<( + Vec, + Option>, + )> { match &self.file_encryptor { Some(file_encryptor) => { let unencrypted_row_groups = row_groups.clone(); @@ -599,7 +633,7 @@ impl MetadataObjectWriter { object: &impl TSerializable, mut sink: impl Write, file_encryptor: &FileEncryptor, - column_metadata: &ColumnChunk, + column_metadata: &crate::format::ColumnChunk, module_type: ModuleType, row_group_index: usize, column_index: usize, @@ -682,14 +716,14 @@ impl MetadataObjectWriter { } fn encrypt_row_groups( - row_groups: Vec, + row_groups: Vec, file_encryptor: &Arc, - ) -> Result> { + ) -> Result> { row_groups .into_iter() .enumerate() .map(|(rg_idx, mut rg)| { - let cols: Result> = rg + let cols: Result> = rg .columns .into_iter() .enumerate() @@ -705,11 +739,11 @@ impl MetadataObjectWriter { /// Apply column encryption to column chunk metadata fn encrypt_column_chunk( - mut column_chunk: ColumnChunk, + mut column_chunk: crate::format::ColumnChunk, file_encryptor: &Arc, row_group_index: usize, column_index: usize, - ) -> Result { + ) -> Result { // Column crypto metadata should have already been set when the column was created. // Here we apply the encryption by encrypting the column metadata if required. match column_chunk.crypto_metadata.as_ref() { diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs index edb6a8fa9d4c..67ca2a3e4c71 100644 --- a/parquet/src/file/page_encoding_stats.rs +++ b/parquet/src/file/page_encoding_stats.rs @@ -19,9 +19,6 @@ use crate::basic::{Encoding, PageType}; use crate::errors::Result; -use crate::format::{ - Encoding as TEncoding, PageEncodingStats as TPageEncodingStats, PageType as TPageType, -}; /// PageEncodingStats for a column chunk and data page. #[derive(Clone, Debug, PartialEq, Eq)] @@ -35,7 +32,9 @@ pub struct PageEncodingStats { } /// Converts Thrift definition into `PageEncodingStats`. -pub fn try_from_thrift(thrift_encoding_stats: &TPageEncodingStats) -> Result { +pub fn try_from_thrift( + thrift_encoding_stats: &crate::format::PageEncodingStats, +) -> Result { let page_type = PageType::try_from(thrift_encoding_stats.page_type)?; let encoding = Encoding::try_from(thrift_encoding_stats.encoding)?; let count = thrift_encoding_stats.count; @@ -48,12 +47,12 @@ pub fn try_from_thrift(thrift_encoding_stats: &TPageEncodingStats) -> Result TPageEncodingStats { - let page_type = TPageType::from(encoding_stats.page_type); - let encoding = TEncoding::from(encoding_stats.encoding); +pub fn to_thrift(encoding_stats: &PageEncodingStats) -> crate::format::PageEncodingStats { + let page_type = crate::format::PageType::from(encoding_stats.page_type); + let encoding = crate::format::Encoding::from(encoding_stats.encoding); let count = encoding_stats.count; - TPageEncodingStats { + crate::format::PageEncodingStats { page_type, encoding, count, diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs index a66509e14c7a..2c9aa009080e 100644 --- a/parquet/src/file/page_index/index.rs +++ b/parquet/src/file/page_index/index.rs @@ -16,13 +16,14 @@ // under the License. //! [`Index`] structures holding decoded [`ColumnIndex`] information +//! +//! [`ColumnIndex`]: crate::format::ColumnIndex -use crate::basic::Type; +use crate::basic::{BoundaryOrder, Type}; use crate::data_type::private::ParquetValueType; use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96}; use crate::errors::ParquetError; use crate::file::metadata::LevelHistogram; -use crate::format::{BoundaryOrder, ColumnIndex}; use std::fmt::Debug; /// Typed statistics for one data page @@ -78,6 +79,11 @@ impl PageIndex { pub fn definition_level_histogram(&self) -> Option<&LevelHistogram> { self.definition_level_histogram.as_ref() } + + /// Returns whether this is an all null page + pub fn is_null_page(&self) -> bool { + self.min.is_none() + } } impl PageIndex @@ -132,7 +138,7 @@ impl Index { pub fn is_sorted(&self) -> bool { // 0:UNORDERED, 1:ASCENDING ,2:DESCENDING, if let Some(order) = self.get_boundary_order() { - order.0 > (BoundaryOrder::UNORDERED.0) + order != BoundaryOrder::UNORDERED } else { false } @@ -170,6 +176,7 @@ impl Index { /// /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md /// [`Statistics`]: crate::file::statistics::Statistics +/// [`ColumnIndex`]: crate::format::ColumnIndex #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct NativeIndex { /// The actual column indexes, one item per page @@ -186,7 +193,7 @@ impl NativeIndex { pub const PHYSICAL_TYPE: Type = T::PHYSICAL_TYPE; /// Creates a new [`NativeIndex`] - pub(crate) fn try_new(index: ColumnIndex) -> Result { + pub(crate) fn try_new(index: crate::format::ColumnIndex) -> Result { let len = index.min_values.len(); let null_counts = index @@ -248,13 +255,14 @@ impl NativeIndex { ) .collect::, ParquetError>>()?; + let boundary_order = index.boundary_order.try_into()?; Ok(Self { indexes, - boundary_order: index.boundary_order, + boundary_order, }) } - pub(crate) fn to_thrift(&self) -> ColumnIndex { + pub(crate) fn to_thrift(&self) -> crate::format::ColumnIndex { let min_values = self .indexes .iter() @@ -288,11 +296,11 @@ impl NativeIndex { .collect::>>() .map(|hists| hists.concat()); - ColumnIndex::new( + crate::format::ColumnIndex::new( self.indexes.iter().map(|x| x.min().is_none()).collect(), min_values, max_values, - self.boundary_order, + self.boundary_order.into(), null_counts, repetition_level_histograms, definition_level_histograms, @@ -350,7 +358,7 @@ mod tests { #[test] fn test_invalid_column_index() { - let column_index = ColumnIndex { + let column_index = crate::format::ColumnIndex { null_pages: vec![true, false], min_values: vec![ vec![], @@ -363,7 +371,7 @@ mod tests { null_counts: None, repetition_level_histograms: None, definition_level_histograms: None, - boundary_order: BoundaryOrder::UNORDERED, + boundary_order: crate::format::BoundaryOrder::UNORDERED, }; let err = NativeIndex::::try_new(column_index).unwrap_err(); diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index d0537711dc20..d4d405d68ff2 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Support for reading [`Index`] and [`OffsetIndex`] from parquet metadata. +//! Support for reading [`Index`] and [`OffsetIndexMetaData`] from parquet metadata. use crate::basic::Type; use crate::data_type::Int96; @@ -24,7 +24,6 @@ use crate::file::metadata::ColumnChunkMetaData; use crate::file::page_index::index::{Index, NativeIndex}; use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::file::reader::ChunkReader; -use crate::format::{ColumnIndex, OffsetIndex}; use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; use std::ops::Range; @@ -48,6 +47,7 @@ pub(crate) fn acc_range(a: Option>, b: Option>) -> Option< /// See [Page Index Documentation] for more details. /// /// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md +/// [`ColumnIndex`]: crate::format::ColumnIndex #[deprecated( since = "55.2.0", note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0" @@ -93,6 +93,7 @@ pub fn read_columns_indexes( /// See [Page Index Documentation] for more details. /// /// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md +/// [`OffsetIndex`]: crate::format::OffsetIndex #[deprecated( since = "55.2.0", note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0" @@ -129,14 +130,14 @@ pub fn read_offset_indexes( pub(crate) fn decode_offset_index(data: &[u8]) -> Result { let mut prot = TCompactSliceInputProtocol::new(data); - let offset = OffsetIndex::read_from_in_protocol(&mut prot)?; + let offset = crate::format::OffsetIndex::read_from_in_protocol(&mut prot)?; OffsetIndexMetaData::try_new(offset) } pub(crate) fn decode_column_index(data: &[u8], column_type: Type) -> Result { let mut prot = TCompactSliceInputProtocol::new(data); - let index = ColumnIndex::read_from_in_protocol(&mut prot)?; + let index = crate::format::ColumnIndex::read_from_in_protocol(&mut prot)?; let index = match column_type { Type::BOOLEAN => Index::BOOLEAN(NativeIndex::::try_new(index)?), diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index d48d1b6c083d..5614b1750a0e 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -16,12 +16,49 @@ // under the License. //! [`OffsetIndexMetaData`] structure holding decoded [`OffsetIndex`] information +//! +//! [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md use crate::errors::ParquetError; -use crate::format::{OffsetIndex, PageLocation}; + +/// Page location information for [`OffsetIndexMetaData`] +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct PageLocation { + /// Offset of the page in the file * + pub offset: i64, + /// Size of the page, including header. Sum of compressed_page_size and header + /// length + pub compressed_page_size: i32, + /// Index within the RowGroup of the first row of the page. When an + /// OffsetIndex is present, pages must begin on row boundaries + /// (repetition_level = 0). + pub first_row_index: i64, +} + +impl From<&crate::format::PageLocation> for PageLocation { + fn from(value: &crate::format::PageLocation) -> Self { + Self { + offset: value.offset, + compressed_page_size: value.compressed_page_size, + first_row_index: value.first_row_index, + } + } +} + +impl From<&PageLocation> for crate::format::PageLocation { + fn from(value: &PageLocation) -> Self { + Self { + offset: value.offset, + compressed_page_size: value.compressed_page_size, + first_row_index: value.first_row_index, + } + } +} /// [`OffsetIndex`] information for a column chunk. Contains offsets and sizes for each page /// in the chunk. Optionally stores fully decoded page sizes for BYTE_ARRAY columns. +/// +/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md #[derive(Debug, Clone, PartialEq)] pub struct OffsetIndexMetaData { /// Vector of [`PageLocation`] objects, one per page in the chunk. @@ -33,9 +70,12 @@ pub struct OffsetIndexMetaData { impl OffsetIndexMetaData { /// Creates a new [`OffsetIndexMetaData`] from an [`OffsetIndex`]. - pub(crate) fn try_new(index: OffsetIndex) -> Result { + /// + /// [`OffsetIndex`]: crate::format::OffsetIndex + pub(crate) fn try_new(index: crate::format::OffsetIndex) -> Result { + let page_locations = index.page_locations.iter().map(|loc| loc.into()).collect(); Ok(Self { - page_locations: index.page_locations, + page_locations, unencoded_byte_array_data_bytes: index.unencoded_byte_array_data_bytes, }) } @@ -53,9 +93,10 @@ impl OffsetIndexMetaData { // TODO: remove annotation after merge #[allow(dead_code)] - pub(crate) fn to_thrift(&self) -> OffsetIndex { - OffsetIndex::new( - self.page_locations.clone(), + pub(crate) fn to_thrift(&self) -> crate::format::OffsetIndex { + let page_locations = self.page_locations.iter().map(|loc| loc.into()).collect(); + crate::format::OffsetIndex::new( + page_locations, self.unencoded_byte_array_data_bytes.clone(), ) } diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index 26177b69a577..a4919435298c 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -20,8 +20,7 @@ use crate::basic::{Compression, Encoding}; use crate::compression::{CodecOptions, CodecOptionsBuilder}; #[cfg(feature = "encryption")] use crate::encryption::encrypt::FileEncryptionProperties; -use crate::file::metadata::KeyValue; -use crate::format::SortingColumn; +use crate::file::metadata::{KeyValue, SortingColumn}; use crate::schema::types::ColumnPath; use std::str::FromStr; use std::{collections::HashMap, sync::Arc}; diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 2edb38deb3e0..f12048a4ee02 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -25,14 +25,14 @@ use crate::compression::{create_codec, Codec}; #[cfg(feature = "encryption")] use crate::encryption::decrypt::{read_and_decrypt, CryptoContext}; use crate::errors::{ParquetError, Result}; -use crate::file::page_index::offset_index::OffsetIndexMetaData; +use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; use crate::file::{ metadata::*, properties::{ReaderProperties, ReaderPropertiesPtr}, reader::*, statistics, }; -use crate::format::{PageHeader, PageLocation, PageType}; +use crate::format::{PageHeader, PageType}; use crate::record::reader::RowIter; use crate::record::Row; use crate::schema::types::Type as SchemaType; @@ -1102,9 +1102,8 @@ mod tests { use bytes::Buf; use crate::file::properties::{EnabledStatistics, WriterProperties}; - use crate::format::BoundaryOrder; - use crate::basic::{self, ColumnOrder, SortOrder}; + use crate::basic::{self, BoundaryOrder, ColumnOrder, SortOrder}; use crate::column::reader::ColumnReader; use crate::data_type::private::ParquetValueType; use crate::data_type::{AsBytes, FixedLenByteArrayType, Int32Type}; diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index 02729a5016bb..d4501830ac40 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -41,8 +41,6 @@ use std::fmt; -use crate::format::Statistics as TStatistics; - use crate::basic::Type; use crate::data_type::private::ParquetValueType; use crate::data_type::*; @@ -122,7 +120,7 @@ macro_rules! statistics_enum_func { /// Converts Thrift definition into `Statistics`. pub fn from_thrift( physical_type: Type, - thrift_stats: Option, + thrift_stats: Option, ) -> Result> { Ok(match thrift_stats { Some(stats) => { @@ -269,7 +267,7 @@ pub fn from_thrift( } /// Convert Statistics into Thrift definition. -pub fn to_thrift(stats: Option<&Statistics>) -> Option { +pub fn to_thrift(stats: Option<&Statistics>) -> Option { let stats = stats?; // record null count if it can fit in i64 @@ -282,7 +280,7 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option { .distinct_count_opt() .and_then(|value| i64::try_from(value).ok()); - let mut thrift_stats = TStatistics { + let mut thrift_stats = crate::format::Statistics { max: None, min: None, null_count, @@ -702,7 +700,7 @@ mod tests { #[test] #[should_panic(expected = "General(\"Statistics null count is negative -10\")")] fn test_statistics_negative_null_count() { - let thrift_stats = TStatistics { + let thrift_stats = crate::format::Statistics { max: None, min: None, null_count: Some(-10), @@ -1017,7 +1015,7 @@ mod tests { #[test] fn test_count_decoding_null_invalid() { - let tstatistics = TStatistics { + let tstatistics = crate::format::Statistics { null_count: Some(-42), ..Default::default() }; diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 31a3344db66c..b985c31ec8b8 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -19,8 +19,8 @@ //! using row group writers and column writers respectively. use crate::bloom_filter::Sbbf; -use crate::format as parquet; -use crate::format::{ColumnIndex, OffsetIndex}; +use crate::file::page_index::index::Index; +use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::thrift::TSerializable; use std::fmt::Debug; use std::io::{BufWriter, IoSlice, Read}; @@ -128,8 +128,8 @@ pub type OnCloseRowGroup<'a, W> = Box< &'a mut TrackedWrite, RowGroupMetaData, Vec>, - Vec>, - Vec>, + Vec>, + Vec>, ) -> Result<()> + 'a + Send, @@ -154,8 +154,8 @@ pub struct SerializedFileWriter { props: WriterPropertiesPtr, row_groups: Vec, bloom_filters: Vec>>, - column_indexes: Vec>>, - offset_indexes: Vec>>, + column_indexes: Vec>>, + offset_indexes: Vec>>, row_group_index: usize, // kv_metadatas will be appended to `props` when `write_metadata` kv_metadatas: Vec, @@ -290,7 +290,7 @@ impl SerializedFileWriter { /// Unlike [`Self::close`] this does not consume self /// /// Attempting to write after calling finish will result in an error - pub fn finish(&mut self) -> Result { + pub fn finish(&mut self) -> Result { self.assert_previous_writer_closed()?; let metadata = self.write_metadata()?; self.buf.flush()?; @@ -298,7 +298,7 @@ impl SerializedFileWriter { } /// Closes and finalises file writer, returning the file metadata. - pub fn close(mut self) -> Result { + pub fn close(mut self) -> Result { self.finish() } @@ -319,7 +319,7 @@ impl SerializedFileWriter { } /// Assembles and writes metadata at the end of the file. - fn write_metadata(&mut self) -> Result { + fn write_metadata(&mut self) -> Result { self.finished = true; // write out any remaining bloom filters after all row groups @@ -339,6 +339,9 @@ impl SerializedFileWriter { .map(|v| v.to_thrift()) .collect::>(); + let column_indexes = self.convert_column_indexes(); + let offset_indexes = self.convert_offset_index(); + let mut encoder = ThriftMetadataWriter::new( &mut self.buf, &self.schema, @@ -356,11 +359,46 @@ impl SerializedFileWriter { if let Some(key_value_metadata) = key_value_metadata { encoder = encoder.with_key_value_metadata(key_value_metadata) } - encoder = encoder.with_column_indexes(&self.column_indexes); - encoder = encoder.with_offset_indexes(&self.offset_indexes); + + encoder = encoder.with_column_indexes(&column_indexes); + encoder = encoder.with_offset_indexes(&offset_indexes); encoder.finish() } + fn convert_column_indexes(&self) -> Vec>> { + self.column_indexes + .iter() + .map(|cis| { + cis.iter() + .map(|ci| { + ci.as_ref().map(|column_index| match column_index { + Index::NONE => panic!("trying to serialize missing column index"), + Index::BOOLEAN(column_index) => column_index.to_thrift(), + Index::BYTE_ARRAY(column_index) => column_index.to_thrift(), + Index::DOUBLE(column_index) => column_index.to_thrift(), + Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index.to_thrift(), + Index::FLOAT(column_index) => column_index.to_thrift(), + Index::INT32(column_index) => column_index.to_thrift(), + Index::INT64(column_index) => column_index.to_thrift(), + Index::INT96(column_index) => column_index.to_thrift(), + }) + }) + .collect() + }) + .collect() + } + + fn convert_offset_index(&self) -> Vec>> { + self.offset_indexes + .iter() + .map(|ois| { + ois.iter() + .map(|oi| oi.as_ref().map(|offset_index| offset_index.to_thrift())) + .collect() + }) + .collect() + } + #[inline] fn assert_previous_writer_closed(&self) -> Result<()> { if self.finished { @@ -499,8 +537,8 @@ pub struct SerializedRowGroupWriter<'a, W: Write> { row_group_metadata: Option, column_chunks: Vec, bloom_filters: Vec>, - column_indexes: Vec>, - offset_indexes: Vec>, + column_indexes: Vec>, + offset_indexes: Vec>, row_group_index: i16, file_offset: i64, on_close: Option>, @@ -901,7 +939,7 @@ impl<'a, W: Write> SerializedPageWriter<'a, W> { /// Serializes page header into Thrift. /// Returns number of bytes that have been written into the sink. #[inline] - fn serialize_page_header(&mut self, header: parquet::PageHeader) -> Result { + fn serialize_page_header(&mut self, header: crate::format::PageHeader) -> Result { let start_pos = self.sink.bytes_written(); match self.page_encryptor_and_sink_mut() { Some((page_encryptor, sink)) => { @@ -1032,7 +1070,6 @@ mod tests { reader::{FileReader, SerializedFileReader, SerializedPageReader}, statistics::{from_thrift, to_thrift, Statistics}, }; - use crate::format::SortingColumn; use crate::record::{Row, RowAccessor}; use crate::schema::parser::parse_message_type; use crate::schema::types; diff --git a/parquet/src/schema/parser.rs b/parquet/src/schema/parser.rs index 0a67250476c7..700be8a15fd6 100644 --- a/parquet/src/schema/parser.rs +++ b/parquet/src/schema/parser.rs @@ -178,9 +178,9 @@ fn parse_timeunit( value .ok_or_else(|| general_err!(not_found_msg)) .and_then(|v| match v.to_uppercase().as_str() { - "MILLIS" => Ok(TimeUnit::MILLIS(Default::default())), - "MICROS" => Ok(TimeUnit::MICROS(Default::default())), - "NANOS" => Ok(TimeUnit::NANOS(Default::default())), + "MILLIS" => Ok(TimeUnit::MILLIS), + "MICROS" => Ok(TimeUnit::MICROS), + "NANOS" => Ok(TimeUnit::NANOS), _ => Err(general_err!(parse_fail_msg)), }) } @@ -1075,7 +1075,7 @@ mod tests { Arc::new( Type::primitive_type_builder("_6", PhysicalType::INT32) .with_logical_type(Some(LogicalType::Time { - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, is_adjusted_to_u_t_c: false, })) .build() @@ -1084,7 +1084,7 @@ mod tests { Arc::new( Type::primitive_type_builder("_7", PhysicalType::INT64) .with_logical_type(Some(LogicalType::Time { - unit: TimeUnit::MICROS(Default::default()), + unit: TimeUnit::MICROS, is_adjusted_to_u_t_c: true, })) .build() @@ -1093,7 +1093,7 @@ mod tests { Arc::new( Type::primitive_type_builder("_8", PhysicalType::INT64) .with_logical_type(Some(LogicalType::Timestamp { - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, is_adjusted_to_u_t_c: true, })) .build() @@ -1102,7 +1102,7 @@ mod tests { Arc::new( Type::primitive_type_builder("_9", PhysicalType::INT64) .with_logical_type(Some(LogicalType::Timestamp { - unit: TimeUnit::NANOS(Default::default()), + unit: TimeUnit::NANOS, is_adjusted_to_u_t_c: false, })) .build() diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs index 5ef068da915b..4190f9717dcd 100644 --- a/parquet/src/schema/printer.rs +++ b/parquet/src/schema/printer.rs @@ -277,9 +277,9 @@ impl<'a> Printer<'a> { #[inline] fn print_timeunit(unit: &TimeUnit) -> &str { match unit { - TimeUnit::MILLIS(_) => "MILLIS", - TimeUnit::MICROS(_) => "MICROS", - TimeUnit::NANOS(_) => "NANOS", + TimeUnit::MILLIS => "MILLIS", + TimeUnit::MICROS => "MICROS", + TimeUnit::NANOS => "NANOS", } } @@ -645,7 +645,7 @@ mod tests { PhysicalType::INT64, Some(LogicalType::Timestamp { is_adjusted_to_u_t_c: true, - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, }), ConvertedType::NONE, Repetition::REQUIRED, @@ -671,7 +671,7 @@ mod tests { None, PhysicalType::INT32, Some(LogicalType::Time { - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, is_adjusted_to_u_t_c: false, }), ConvertedType::TIME_MILLIS, @@ -686,7 +686,7 @@ mod tests { Some(42), PhysicalType::INT32, Some(LogicalType::Time { - unit: TimeUnit::MILLIS(Default::default()), + unit: TimeUnit::MILLIS, is_adjusted_to_u_t_c: false, }), ConvertedType::TIME_MILLIS, diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 68492e19f437..0b36fbb63b34 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -20,7 +20,6 @@ use std::{collections::HashMap, fmt, sync::Arc}; use crate::file::metadata::HeapSize; -use crate::format::SchemaElement; use crate::basic::{ ColumnOrder, ConvertedType, LogicalType, Repetition, SortOrder, TimeUnit, Type as PhysicalType, @@ -375,13 +374,13 @@ impl<'a> PrimitiveTypeBuilder<'a> { (LogicalType::Date, PhysicalType::INT32) => {} ( LogicalType::Time { - unit: TimeUnit::MILLIS(_), + unit: TimeUnit::MILLIS, .. }, PhysicalType::INT32, ) => {} (LogicalType::Time { unit, .. }, PhysicalType::INT64) => { - if *unit == TimeUnit::MILLIS(Default::default()) { + if *unit == TimeUnit::MILLIS { return Err(general_err!( "Cannot use millisecond unit on INT64 type for field '{}'", self.name @@ -1160,7 +1159,7 @@ fn build_tree<'a>( } /// Method to convert from Thrift. -pub fn from_thrift(elements: &[SchemaElement]) -> Result { +pub fn from_thrift(elements: &[crate::format::SchemaElement]) -> Result { let mut index = 0; let mut schema_nodes = Vec::new(); while index < elements.len() { @@ -1198,7 +1197,10 @@ fn check_logical_type(logical_type: &Option) -> Result<()> { /// The first result is the starting index for the next Type after this one. If it is /// equal to `elements.len()`, then this Type is the last one. /// The second result is the result Type. -fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize, TypePtr)> { +fn from_thrift_helper( + elements: &[crate::format::SchemaElement], + index: usize, +) -> Result<(usize, TypePtr)> { // Whether or not the current node is root (message type). // There is only one message type node in the schema tree. let is_root_node = index == 0; @@ -1313,18 +1315,18 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize } /// Method to convert to Thrift. -pub fn to_thrift(schema: &Type) -> Result> { +pub fn to_thrift(schema: &Type) -> Result> { if !schema.is_group() { return Err(general_err!("Root schema must be Group type")); } - let mut elements: Vec = Vec::new(); + let mut elements: Vec = Vec::new(); to_thrift_helper(schema, &mut elements); Ok(elements) } /// Constructs list of `SchemaElement` from the schema using depth-first traversal. /// Here we assume that schema is always valid and starts with group type. -fn to_thrift_helper(schema: &Type, elements: &mut Vec) { +fn to_thrift_helper(schema: &Type, elements: &mut Vec) { match *schema { Type::PrimitiveType { ref basic_info, @@ -1333,7 +1335,7 @@ fn to_thrift_helper(schema: &Type, elements: &mut Vec) { scale, precision, } => { - let element = SchemaElement { + let element = crate::format::SchemaElement { type_: Some(physical_type.into()), type_length: if type_length >= 0 { Some(type_length) @@ -1370,7 +1372,7 @@ fn to_thrift_helper(schema: &Type, elements: &mut Vec) { None }; - let element = SchemaElement { + let element = crate::format::SchemaElement { type_: None, type_length: None, repetition_type: repetition, diff --git a/parquet/src/thrift.rs b/parquet/src/thrift.rs index fc391abe87d7..984ba43ec7ad 100644 --- a/parquet/src/thrift.rs +++ b/parquet/src/thrift.rs @@ -33,12 +33,18 @@ pub trait TSerializable: Sized { fn write_to_out_protocol(&self, o_prot: &mut T) -> thrift::Result<()>; } -/// Public function to aid benchmarking. +/// Public function to aid benchmarking. Reads Parquet `FileMetaData` encoded in `bytes`. pub fn bench_file_metadata(bytes: &bytes::Bytes) { let mut input = TCompactSliceInputProtocol::new(bytes); crate::format::FileMetaData::read_from_in_protocol(&mut input).unwrap(); } +/// Public function to aid benchmarking. Reads Parquet `PageHeader` encoded in `bytes`. +pub fn bench_page_header(bytes: &bytes::Bytes) { + let mut input = TCompactSliceInputProtocol::new(bytes); + crate::format::PageHeader::read_from_in_protocol(&mut input).unwrap(); +} + /// A more performant implementation of [`TCompactInputProtocol`] that reads a slice /// /// [`TCompactInputProtocol`]: thrift::protocol::TCompactInputProtocol @@ -323,7 +329,6 @@ fn eof_error() -> thrift::Error { #[cfg(test)] mod tests { - use crate::format::{BoundaryOrder, ColumnIndex}; use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; #[test] @@ -334,12 +339,12 @@ mod tests { let bytes = vec![0x19, 0x21, 2, 1, 0x19, 8, 0x19, 8, 0x15, 0, 0]; let mut protocol = TCompactSliceInputProtocol::new(bytes.as_slice()); - let index = ColumnIndex::read_from_in_protocol(&mut protocol).unwrap(); - let expected = ColumnIndex { + let index = crate::format::ColumnIndex::read_from_in_protocol(&mut protocol).unwrap(); + let expected = crate::format::ColumnIndex { null_pages: vec![false, true], min_values: vec![], max_values: vec![], - boundary_order: BoundaryOrder::UNORDERED, + boundary_order: crate::format::BoundaryOrder::UNORDERED, null_counts: None, repetition_level_histograms: None, definition_level_histograms: None, @@ -355,12 +360,12 @@ mod tests { let bytes = vec![0x19, 0x22, 0, 1, 0x19, 8, 0x19, 8, 0x15, 0, 0]; let mut protocol = TCompactSliceInputProtocol::new(bytes.as_slice()); - let index = ColumnIndex::read_from_in_protocol(&mut protocol).unwrap(); - let expected = ColumnIndex { + let index = crate::format::ColumnIndex::read_from_in_protocol(&mut protocol).unwrap(); + let expected = crate::format::ColumnIndex { null_pages: vec![false, true], min_values: vec![], max_values: vec![], - boundary_order: BoundaryOrder::UNORDERED, + boundary_order: crate::format::BoundaryOrder::UNORDERED, null_counts: None, repetition_level_histograms: None, definition_level_histograms: None, From 50f33231b200d9456de5e73e8a3c8aea51312854 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Mon, 11 Aug 2025 15:23:54 -0700 Subject: [PATCH 02/15] [thrift-remodel] Redo thrift enums and unions (#8072) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. - Related to #6129 # Rationale for this change Next step. # What changes are included in this PR? Attempt to make use of macros originally developed by @jhorstmann to implement some thrift enums and unions in `basic.rs`. Also adds yet another spin on a thrift decoder based heavily on `TCompactSliceInputProtocol`. The current approach is to use this new decoder with `TryFrom` impls on the data structures in question. This PR does not yet complete the process far enough to parse a parquet footer, but I'm putting it out here to get early feedback on the design. Some structures already deviate enough from their thrift counterparts that the macro based parsing is not practical. Likewise, the macro approach doesn't work for structs that need lifetime annotations (necessary for reading binary fields as slices). # Are these changes tested? Not yet. # Are there any user-facing changes? Yes. --- parquet/src/basic.rs | 940 +++++++++++++++---------- parquet/src/lib.rs | 2 + parquet/src/parquet_macros.rs | 264 +++++++ parquet/src/parquet_thrift.rs | 415 +++++++++++ parquet/src/schema/printer.rs | 11 +- parquet/tests/arrow_reader/bad_data.rs | 2 +- 6 files changed, 1243 insertions(+), 391 deletions(-) create mode 100644 parquet/src/parquet_macros.rs create mode 100644 parquet/src/parquet_thrift.rs diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index de53c57ce53a..92d70a933a03 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -24,6 +24,8 @@ use std::str::FromStr; use std::{fmt, str}; pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel}; +use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::{thrift_enum, thrift_private_struct, thrift_union_all_empty}; use crate::errors::{ParquetError, Result}; @@ -33,35 +35,29 @@ use crate::errors::{ParquetError, Result}; // ---------------------------------------------------------------------- // Mirrors thrift enum `crate::format::Type` +thrift_enum!( /// Types supported by Parquet. /// /// These physical types are intended to be used in combination with the encodings to /// control the on disk storage format. /// For example INT16 is not included as a type since a good encoding of INT32 /// would handle this. -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -#[allow(non_camel_case_types)] -pub enum Type { - /// A boolean value. - BOOLEAN, - /// 32-bit signed integer. - INT32, - /// 64-bit signed integer. - INT64, - /// 96-bit signed integer for timestamps. - INT96, - /// IEEE 754 single-precision floating point value. - FLOAT, - /// IEEE 754 double-precision floating point value. - DOUBLE, - /// Arbitrary length byte array. - BYTE_ARRAY, - /// Fixed length byte array. - FIXED_LEN_BYTE_ARRAY, +enum Type { + BOOLEAN = 0; + INT32 = 1; + INT64 = 2; + INT96 = 3; // deprecated, only used by legacy implementations. + FLOAT = 4; + DOUBLE = 5; + BYTE_ARRAY = 6; + FIXED_LEN_BYTE_ARRAY = 7; } +); // ---------------------------------------------------------------------- // Mirrors thrift enum `crate::format::ConvertedType` +// +// Cannot use macros because of added field `None` /// Common types (converted types) used by frameworks when using Parquet. /// @@ -165,23 +161,151 @@ pub enum ConvertedType { INTERVAL, } +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ConvertedType { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let val = prot.read_i32()?; + Ok(match val { + 0 => Self::UTF8, + 1 => Self::MAP, + 2 => Self::MAP_KEY_VALUE, + 3 => Self::LIST, + 4 => Self::ENUM, + 5 => Self::DECIMAL, + 6 => Self::DATE, + 7 => Self::TIME_MILLIS, + 8 => Self::TIME_MICROS, + 9 => Self::TIMESTAMP_MILLIS, + 10 => Self::TIMESTAMP_MICROS, + 11 => Self::UINT_8, + 12 => Self::UINT_16, + 13 => Self::UINT_32, + 14 => Self::UINT_64, + 15 => Self::INT_8, + 16 => Self::INT_16, + 17 => Self::INT_32, + 18 => Self::INT_64, + 19 => Self::JSON, + 20 => Self::BSON, + 21 => Self::INTERVAL, + _ => return Err(general_err!("Unexpected ConvertedType {}", val)), + }) + } +} + // ---------------------------------------------------------------------- // Mirrors thrift union `crate::format::TimeUnit` +thrift_union_all_empty!( /// Time unit for `Time` and `Timestamp` logical types. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum TimeUnit { - /// Milliseconds. - MILLIS, - /// Microseconds. - MICROS, - /// Nanoseconds. - NANOS, +union TimeUnit { + 1: MilliSeconds MILLIS + 2: MicroSeconds MICROS + 3: NanoSeconds NANOS } +); // ---------------------------------------------------------------------- // Mirrors thrift union `crate::format::LogicalType` +// private structs for decoding logical type + +thrift_private_struct!( +struct DecimalType { + 1: required i32 scale + 2: required i32 precision +} +); + +thrift_private_struct!( +struct TimestampType { + 1: required bool is_adjusted_to_u_t_c + 2: required TimeUnit unit +} +); + +// they are identical +use TimestampType as TimeType; + +thrift_private_struct!( +struct IntType { + 1: required i8 bit_width + 2: required bool is_signed +} +); + +thrift_private_struct!( +struct VariantType { + // The version of the variant specification that the variant was + // written with. + 1: optional i8 specification_version +} +); + +// TODO need macro for structs that need lifetime annotation +struct GeometryType<'a> { + crs: Option<&'a str>, +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for GeometryType<'a> { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let mut crs: Option<&str> = None; + prot.read_struct_begin()?; + loop { + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + break; + } + match field_ident.id { + 1 => { + let val = prot.read_string()?; + crs = Some(val); + } + _ => { + prot.skip(field_ident.field_type)?; + } + }; + } + Ok(Self { crs }) + } +} + +struct GeographyType<'a> { + crs: Option<&'a str>, + algorithm: Option, +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for GeographyType<'a> { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let mut crs: Option<&str> = None; + let mut algorithm: Option = None; + prot.read_struct_begin()?; + loop { + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + break; + } + match field_ident.id { + 1 => { + let val = prot.read_string()?; + crs = Some(val); + } + 2 => { + let val = EdgeInterpolationAlgorithm::try_from(&mut *prot)?; + algorithm = Some(val); + } + + _ => { + prot.skip(field_ident.field_type)?; + } + }; + } + Ok(Self { crs, algorithm }) + } +} + /// Logical types used by version 2.4.0+ of the Parquet format. /// /// This is an *entirely new* struct as of version @@ -238,15 +362,149 @@ pub enum LogicalType { /// A 16-bit floating point number. Float16, /// A Variant value. - Variant, + Variant { + /// The version of the variant specification that the variant was written with. + specification_version: Option, + }, /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation. - Geometry, + Geometry { + /// A custom CRS. If unset the defaults to `OGC:CRS84`. + crs: Option, + }, /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation. - Geography, + Geography { + /// A custom CRS. If unset the defaults to `OGC:CRS84`. + crs: Option, + /// An optional algorithm can be set to correctly interpret edges interpolation + /// of the geometries. If unset, the algorithm defaults to `SPHERICAL``. + algorithm: Option, + }, + /// For forward compatibility; used when an unknown union value is encountered. + _Unknown { + /// The field id encountered when parsing the unknown logical type. + field_id: i16, + }, +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for LogicalType { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_struct_begin()?; + + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + return Err(general_err!("received empty union from remote LogicalType")); + } + let ret = match field_ident.id { + 1 => { + prot.skip_empty_struct()?; + Self::String + } + 2 => { + prot.skip_empty_struct()?; + Self::Map + } + 3 => { + prot.skip_empty_struct()?; + Self::List + } + 4 => { + prot.skip_empty_struct()?; + Self::Enum + } + 5 => { + let val = DecimalType::try_from(&mut *prot)?; + Self::Decimal { + scale: val.scale, + precision: val.precision, + } + } + 6 => { + prot.skip_empty_struct()?; + Self::Date + } + 7 => { + let val = TimeType::try_from(&mut *prot)?; + Self::Time { + is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c, + unit: val.unit, + } + } + 8 => { + let val = TimestampType::try_from(&mut *prot)?; + Self::Timestamp { + is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c, + unit: val.unit, + } + } + 10 => { + let val = IntType::try_from(&mut *prot)?; + Self::Integer { + is_signed: val.is_signed, + bit_width: val.bit_width, + } + } + 11 => { + prot.skip_empty_struct()?; + Self::Unknown + } + 12 => { + prot.skip_empty_struct()?; + Self::Json + } + 13 => { + prot.skip_empty_struct()?; + Self::Bson + } + 14 => { + prot.skip_empty_struct()?; + Self::Uuid + } + 15 => { + prot.skip_empty_struct()?; + Self::Float16 + } + 16 => { + let val = VariantType::try_from(&mut *prot)?; + Self::Variant { + specification_version: val.specification_version, + } + } + 17 => { + let val = GeometryType::try_from(&mut *prot)?; + Self::Geometry { + crs: val.crs.map(|s| s.to_owned()), + } + } + 18 => { + let val = GeographyType::try_from(&mut *prot)?; + Self::Geography { + crs: val.crs.map(|s| s.to_owned()), + algorithm: val.algorithm, + } + } + _ => { + prot.skip(field_ident.field_type)?; + Self::_Unknown { + field_id: field_ident.id, + } + } + }; + let field_ident = prot.read_field_begin()?; + if field_ident.field_type != FieldType::Stop { + return Err(general_err!( + "Received multiple fields for union from remote LogicalType" + )); + } + prot.read_struct_end()?; + Ok(ret) + } } // ---------------------------------------------------------------------- // Mirrors thrift enum `crate::format::FieldRepetitionType` +// +// Cannot use macro since the name is changed /// Representation of field types in schema. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -260,9 +518,23 @@ pub enum Repetition { REPEATED, } +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Repetition { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let val = prot.read_i32()?; + Ok(match val { + 0 => Self::REQUIRED, + 1 => Self::OPTIONAL, + 2 => Self::REPEATED, + _ => return Err(general_err!("Unexpected FieldRepetitionType {}", val)), + }) + } +} + // ---------------------------------------------------------------------- // Mirrors thrift enum `crate::format::Encoding` +thrift_enum!( /// Encodings supported by Parquet. /// /// Not all encodings are valid for all types. These enums are also used to specify the @@ -279,80 +551,19 @@ pub enum Repetition { /// performance impact when evaluating these encodings. /// /// [WriterVersion]: crate::file::properties::WriterVersion -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] -#[allow(non_camel_case_types)] -pub enum Encoding { - /// Default byte encoding. - /// - BOOLEAN - 1 bit per value, 0 is false; 1 is true. - /// - INT32 - 4 bytes per value, stored as little-endian. - /// - INT64 - 8 bytes per value, stored as little-endian. - /// - FLOAT - 4 bytes per value, stored as little-endian. - /// - DOUBLE - 8 bytes per value, stored as little-endian. - /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. - /// - FIXED_LEN_BYTE_ARRAY - just the bytes are stored. - PLAIN, - - /// **Deprecated** dictionary encoding. - /// - /// The values in the dictionary are encoded using PLAIN encoding. - /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and - /// PLAIN encoding is used for dictionary page. - PLAIN_DICTIONARY, - - /// Group packed run length encoding. - /// - /// Usable for definition/repetition levels encoding and boolean values. - RLE, - - /// **Deprecated** Bit-packed encoding. - /// - /// This can only be used if the data has a known max width. - /// Usable for definition/repetition levels encoding. - /// - /// There are compatibility issues with files using this encoding. - /// The parquet standard specifies the bits to be packed starting from the - /// most-significant bit, several implementations do not follow this bit order. - /// Several other implementations also have issues reading this encoding - /// because of incorrect assumptions about the length of the encoded data. - /// - /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead. - #[deprecated( - since = "51.0.0", - note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead" - )] - BIT_PACKED, - - /// Delta encoding for integers, either INT32 or INT64. - /// - /// Works best on sorted data. - DELTA_BINARY_PACKED, - - /// Encoding for byte arrays to separate the length values and the data. - /// - /// The lengths are encoded using DELTA_BINARY_PACKED encoding. - DELTA_LENGTH_BYTE_ARRAY, - - /// Incremental encoding for byte arrays. - /// - /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding. - /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding. - DELTA_BYTE_ARRAY, - - /// Dictionary encoding. - /// - /// The ids are encoded using the RLE encoding. - RLE_DICTIONARY, - - /// Encoding for fixed-width data. - /// - /// K byte-streams are created where K is the size in bytes of the data type. - /// The individual bytes of a value are scattered to the corresponding stream and - /// the streams are concatenated. - /// This itself does not reduce the size of the data but can lead to better compression - /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may - /// perform poorly for large values of N. - BYTE_STREAM_SPLIT, +enum Encoding { + PLAIN = 0; + // GROUP_VAR_INT = 1; + PLAIN_DICTIONARY = 2; + RLE = 3; + BIT_PACKED = 4; + DELTA_BINARY_PACKED = 5; + DELTA_LENGTH_BYTE_ARRAY = 6; + DELTA_BYTE_ARRAY = 7; + RLE_DICTIONARY = 8; + BYTE_STREAM_SPLIT = 9; } +); impl FromStr for Encoding { type Err = ParquetError; @@ -415,6 +626,24 @@ pub enum Compression { LZ4_RAW, } +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Compression { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let val = prot.read_i32()?; + Ok(match val { + 0 => Self::UNCOMPRESSED, + 1 => Self::SNAPPY, + 2 => Self::GZIP(Default::default()), + 3 => Self::LZO, + 4 => Self::BROTLI(Default::default()), + 5 => Self::LZ4, + 6 => Self::ZSTD(Default::default()), + 7 => Self::LZ4_RAW, + _ => return Err(general_err!("Unexpected CompressionCodec {}", val)), + }) + } +} + impl Compression { /// Returns the codec type of this compression setting as a string, without the compression /// level. @@ -506,69 +735,78 @@ impl FromStr for Compression { } // ---------------------------------------------------------------------- -/// Mirrors thrift enum `crate::format::PageType` -/// +// Mirrors thrift enum `crate::format::PageType` + +thrift_enum!( /// Available data pages for Parquet file format. /// Note that some of the page types may not be supported. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[allow(non_camel_case_types)] -pub enum PageType { - /// Data page Parquet 1.0 - DATA_PAGE, - /// Index page - INDEX_PAGE, - /// Dictionary page - DICTIONARY_PAGE, - /// Data page Parquet 2.0 - DATA_PAGE_V2, +enum PageType { + DATA_PAGE = 0; + INDEX_PAGE = 1; + DICTIONARY_PAGE = 2; + DATA_PAGE_V2 = 3; } +); // ---------------------------------------------------------------------- // Mirrors thrift enum `crate::format::BoundaryOrder` +thrift_enum!( /// Enum to annotate whether lists of min/max elements inside ColumnIndex /// are ordered and if so, in which direction. -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub enum BoundaryOrder { - /// Min/max stats are unordered. - UNORDERED, - /// Min/max stats are ordered in an ascending fashion. - ASCENDING, - /// Min/max stats are ordered in an descending fashion. - DESCENDING, +enum BoundaryOrder { + UNORDERED = 0; + ASCENDING = 1; + DESCENDING = 2; +} +); + +// ---------------------------------------------------------------------- +// Mirrors thrift enum `crate::format::EdgeInterpolationAlgorithm` + +thrift_enum!( +/// Edge interpolation algorithm for Geography logical type +enum EdgeInterpolationAlgorithm { + SPHERICAL = 0; + VINCENTY = 1; + THOMAS = 2; + ANDOYER = 3; + KARNEY = 4; } +); // ---------------------------------------------------------------------- // Mirrors thrift union `crate::format::BloomFilterAlgorithm` +thrift_union_all_empty!( /// The algorithm used in Bloom filter. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum BloomFilterAlgorithm { - /// Block-based Bloom filter. - BLOCK, +union BloomFilterAlgorithm { + /** Block-based Bloom filter. **/ + 1: SplitBlockAlgorithm BLOCK; } +); // ---------------------------------------------------------------------- // Mirrors thrift union `crate::format::BloomFilterHash` +thrift_union_all_empty!( /// The hash function used in Bloom filter. This function takes the hash of a column value /// using plain encoding. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum BloomFilterHash { - /// xxHash is an extremely fast non-cryptographic hash algorithm. It uses 64 bits version - /// of xxHash. - XXHASH, +union BloomFilterHash { + /** xxHash Strategy. **/ + 1: XxHash XXHASH; } +); // ---------------------------------------------------------------------- // Mirrors thrift union `crate::format::BloomFilterCompression` +thrift_union_all_empty!( /// The compression used in the Bloom filter. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum BloomFilterCompression { - /// No compression is used. - UNCOMPRESSED, +union BloomFilterCompression { + 1: Uncompressed UNCOMPRESSED; } +); // ---------------------------------------------------------------------- // Mirrors thrift union `crate::format::ColumnOrder` @@ -610,9 +848,13 @@ pub enum ColumnOrder { /// Column uses the order defined by its logical or physical type /// (if there is no logical type), parquet-format 2.4.0+. TYPE_DEFINED_ORDER(SortOrder), + // The following are not defined in the Parquet spec and should always be last. /// Undefined column order, means legacy behaviour before parquet-format 2.4.0. /// Sort order is always SIGNED. UNDEFINED, + /// An unknown but present ColumnOrder. Statistics with an unknown `ColumnOrder` + /// will be ignored. + UNKNOWN, } impl ColumnOrder { @@ -640,9 +882,10 @@ impl ColumnOrder { LogicalType::Unknown => SortOrder::UNDEFINED, LogicalType::Uuid => SortOrder::UNSIGNED, LogicalType::Float16 => SortOrder::SIGNED, - LogicalType::Variant | LogicalType::Geometry | LogicalType::Geography => { - SortOrder::UNDEFINED - } + LogicalType::Variant { .. } + | LogicalType::Geometry { .. } + | LogicalType::Geography { .. } + | LogicalType::_Unknown { .. } => SortOrder::UNDEFINED, }, // Fall back to converted type None => Self::get_converted_sort_order(converted_type, physical_type), @@ -712,19 +955,45 @@ impl ColumnOrder { match *self { ColumnOrder::TYPE_DEFINED_ORDER(order) => order, ColumnOrder::UNDEFINED => SortOrder::SIGNED, + ColumnOrder::UNKNOWN => SortOrder::UNDEFINED, } } } -// ---------------------------------------------------------------------- -// Display handlers +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ColumnOrder { + type Error = ParquetError; -impl fmt::Display for Type { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{self:?}") + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_struct_begin()?; + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + return Err(general_err!("Received empty union from remote ColumnOrder")); + } + let ret = match field_ident.id { + 1 => { + // TODO: the sort order needs to be set correctly after parsing. + prot.skip_empty_struct()?; + Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED) + } + _ => { + prot.skip(field_ident.field_type)?; + Self::UNKNOWN + } + }; + let field_ident = prot.read_field_begin()?; + if field_ident.field_type != FieldType::Stop { + return Err(general_err!( + "Received multiple fields for union from remote ColumnOrder" + )); + } + prot.read_struct_end()?; + Ok(ret) } } +// ---------------------------------------------------------------------- +// Display handlers + impl fmt::Display for ConvertedType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self:?}") @@ -737,24 +1006,12 @@ impl fmt::Display for Repetition { } } -impl fmt::Display for Encoding { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{self:?}") - } -} - impl fmt::Display for Compression { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self:?}") } } -impl fmt::Display for PageType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{self:?}") - } -} - impl fmt::Display for SortOrder { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self:?}") @@ -767,42 +1024,6 @@ impl fmt::Display for ColumnOrder { } } -// ---------------------------------------------------------------------- -// crate::format::Type <=> Type conversion - -impl TryFrom for Type { - type Error = ParquetError; - - fn try_from(value: crate::format::Type) -> Result { - Ok(match value { - crate::format::Type::BOOLEAN => Type::BOOLEAN, - crate::format::Type::INT32 => Type::INT32, - crate::format::Type::INT64 => Type::INT64, - crate::format::Type::INT96 => Type::INT96, - crate::format::Type::FLOAT => Type::FLOAT, - crate::format::Type::DOUBLE => Type::DOUBLE, - crate::format::Type::BYTE_ARRAY => Type::BYTE_ARRAY, - crate::format::Type::FIXED_LEN_BYTE_ARRAY => Type::FIXED_LEN_BYTE_ARRAY, - _ => return Err(general_err!("unexpected parquet type: {}", value.0)), - }) - } -} - -impl From for crate::format::Type { - fn from(value: Type) -> Self { - match value { - Type::BOOLEAN => crate::format::Type::BOOLEAN, - Type::INT32 => crate::format::Type::INT32, - Type::INT64 => crate::format::Type::INT64, - Type::INT96 => crate::format::Type::INT96, - Type::FLOAT => crate::format::Type::FLOAT, - Type::DOUBLE => crate::format::Type::DOUBLE, - Type::BYTE_ARRAY => crate::format::Type::BYTE_ARRAY, - Type::FIXED_LEN_BYTE_ARRAY => crate::format::Type::FIXED_LEN_BYTE_ARRAY, - } - } -} - // ---------------------------------------------------------------------- // crate::format::ConvertedType <=> ConvertedType conversion @@ -876,92 +1097,6 @@ impl From for Option { } } -// ---------------------------------------------------------------------- -// crate::format::BloomFilterHash <=> BloomFilterHash conversion - -impl From for BloomFilterHash { - fn from(value: crate::format::BloomFilterHash) -> Self { - match value { - crate::format::BloomFilterHash::XXHASH(_) => BloomFilterHash::XXHASH, - } - } -} - -impl From for crate::format::BloomFilterHash { - fn from(value: BloomFilterHash) -> Self { - match value { - BloomFilterHash::XXHASH => crate::format::BloomFilterHash::XXHASH(Default::default()), - } - } -} - -// ---------------------------------------------------------------------- -// crate::format::BloomFilterAlgorithm <=> BloomFilterAlgorithm conversion - -impl From for BloomFilterAlgorithm { - fn from(value: crate::format::BloomFilterAlgorithm) -> Self { - match value { - crate::format::BloomFilterAlgorithm::BLOCK(_) => BloomFilterAlgorithm::BLOCK, - } - } -} - -impl From for crate::format::BloomFilterAlgorithm { - fn from(value: BloomFilterAlgorithm) -> Self { - match value { - BloomFilterAlgorithm::BLOCK => { - crate::format::BloomFilterAlgorithm::BLOCK(Default::default()) - } - } - } -} - -// ---------------------------------------------------------------------- -// crate::format::BloomFilterCompression <=> BloomFilterCompression conversion - -impl From for BloomFilterCompression { - fn from(value: crate::format::BloomFilterCompression) -> Self { - match value { - crate::format::BloomFilterCompression::UNCOMPRESSED(_) => { - BloomFilterCompression::UNCOMPRESSED - } - } - } -} - -impl From for crate::format::BloomFilterCompression { - fn from(value: BloomFilterCompression) -> Self { - match value { - BloomFilterCompression::UNCOMPRESSED => { - crate::format::BloomFilterCompression::UNCOMPRESSED(Default::default()) - } - } - } -} - -// ---------------------------------------------------------------------- -// crate::format::TimeUnit <=> TimeUnit conversion - -impl From for TimeUnit { - fn from(value: crate::format::TimeUnit) -> Self { - match value { - crate::format::TimeUnit::MILLIS(_) => TimeUnit::MILLIS, - crate::format::TimeUnit::MICROS(_) => TimeUnit::MICROS, - crate::format::TimeUnit::NANOS(_) => TimeUnit::NANOS, - } - } -} - -impl From for crate::format::TimeUnit { - fn from(value: TimeUnit) -> Self { - match value { - TimeUnit::MILLIS => crate::format::TimeUnit::MILLIS(crate::format::MilliSeconds {}), - TimeUnit::MICROS => crate::format::TimeUnit::MICROS(crate::format::MicroSeconds {}), - TimeUnit::NANOS => crate::format::TimeUnit::NANOS(crate::format::NanoSeconds {}), - } - } -} - // ---------------------------------------------------------------------- // crate::format::LogicalType <=> LogicalType conversion @@ -994,9 +1129,14 @@ impl From for LogicalType { crate::format::LogicalType::BSON(_) => LogicalType::Bson, crate::format::LogicalType::UUID(_) => LogicalType::Uuid, crate::format::LogicalType::FLOAT16(_) => LogicalType::Float16, - crate::format::LogicalType::VARIANT(_) => LogicalType::Variant, - crate::format::LogicalType::GEOMETRY(_) => LogicalType::Geometry, - crate::format::LogicalType::GEOGRAPHY(_) => LogicalType::Geography, + crate::format::LogicalType::VARIANT(vt) => LogicalType::Variant { + specification_version: vt.specification_version, + }, + crate::format::LogicalType::GEOMETRY(gt) => LogicalType::Geometry { crs: gt.crs }, + crate::format::LogicalType::GEOGRAPHY(gt) => LogicalType::Geography { + crs: gt.crs, + algorithm: gt.algorithm.map(|a| a.try_into().unwrap()), + }, } } } @@ -1038,9 +1178,23 @@ impl From for crate::format::LogicalType { LogicalType::Bson => crate::format::LogicalType::BSON(Default::default()), LogicalType::Uuid => crate::format::LogicalType::UUID(Default::default()), LogicalType::Float16 => crate::format::LogicalType::FLOAT16(Default::default()), - LogicalType::Variant => crate::format::LogicalType::VARIANT(Default::default()), - LogicalType::Geometry => crate::format::LogicalType::GEOMETRY(Default::default()), - LogicalType::Geography => crate::format::LogicalType::GEOGRAPHY(Default::default()), + LogicalType::Variant { + specification_version, + } => crate::format::LogicalType::VARIANT(crate::format::VariantType { + specification_version, + }), + LogicalType::Geometry { crs } => { + crate::format::LogicalType::GEOMETRY(crate::format::GeometryType { crs }) + } + LogicalType::Geography { crs, algorithm } => { + crate::format::LogicalType::GEOGRAPHY(crate::format::GeographyType { + crs, + algorithm: algorithm.map(|a| a.into()), + }) + } + LogicalType::_Unknown { .. } => { + panic!("Trying to convert unknown LogicalType to thrift"); + } } } } @@ -1092,9 +1246,10 @@ impl From> for ConvertedType { LogicalType::Bson => ConvertedType::BSON, LogicalType::Uuid | LogicalType::Float16 - | LogicalType::Variant - | LogicalType::Geometry - | LogicalType::Geography + | LogicalType::Variant { .. } + | LogicalType::Geometry { .. } + | LogicalType::Geography { .. } + | LogicalType::_Unknown { .. } | LogicalType::Unknown => ConvertedType::NONE, }, None => ConvertedType::NONE, @@ -1133,46 +1288,6 @@ impl From for crate::format::FieldRepetitionType { } } -// ---------------------------------------------------------------------- -// crate::format::Encoding <=> Encoding conversion - -impl TryFrom for Encoding { - type Error = ParquetError; - - fn try_from(value: crate::format::Encoding) -> Result { - Ok(match value { - crate::format::Encoding::PLAIN => Encoding::PLAIN, - crate::format::Encoding::PLAIN_DICTIONARY => Encoding::PLAIN_DICTIONARY, - crate::format::Encoding::RLE => Encoding::RLE, - #[allow(deprecated)] - crate::format::Encoding::BIT_PACKED => Encoding::BIT_PACKED, - crate::format::Encoding::DELTA_BINARY_PACKED => Encoding::DELTA_BINARY_PACKED, - crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY => Encoding::DELTA_LENGTH_BYTE_ARRAY, - crate::format::Encoding::DELTA_BYTE_ARRAY => Encoding::DELTA_BYTE_ARRAY, - crate::format::Encoding::RLE_DICTIONARY => Encoding::RLE_DICTIONARY, - crate::format::Encoding::BYTE_STREAM_SPLIT => Encoding::BYTE_STREAM_SPLIT, - _ => return Err(general_err!("unexpected parquet encoding: {}", value.0)), - }) - } -} - -impl From for crate::format::Encoding { - fn from(value: Encoding) -> Self { - match value { - Encoding::PLAIN => crate::format::Encoding::PLAIN, - Encoding::PLAIN_DICTIONARY => crate::format::Encoding::PLAIN_DICTIONARY, - Encoding::RLE => crate::format::Encoding::RLE, - #[allow(deprecated)] - Encoding::BIT_PACKED => crate::format::Encoding::BIT_PACKED, - Encoding::DELTA_BINARY_PACKED => crate::format::Encoding::DELTA_BINARY_PACKED, - Encoding::DELTA_LENGTH_BYTE_ARRAY => crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DELTA_BYTE_ARRAY => crate::format::Encoding::DELTA_BYTE_ARRAY, - Encoding::RLE_DICTIONARY => crate::format::Encoding::RLE_DICTIONARY, - Encoding::BYTE_STREAM_SPLIT => crate::format::Encoding::BYTE_STREAM_SPLIT, - } - } -} - // ---------------------------------------------------------------------- // crate::format::CompressionCodec <=> Compression conversion @@ -1214,65 +1329,6 @@ impl From for crate::format::CompressionCodec { } } -// ---------------------------------------------------------------------- -// crate::format::PageType <=> PageType conversion - -impl TryFrom for PageType { - type Error = ParquetError; - - fn try_from(value: crate::format::PageType) -> Result { - Ok(match value { - crate::format::PageType::DATA_PAGE => PageType::DATA_PAGE, - crate::format::PageType::INDEX_PAGE => PageType::INDEX_PAGE, - crate::format::PageType::DICTIONARY_PAGE => PageType::DICTIONARY_PAGE, - crate::format::PageType::DATA_PAGE_V2 => PageType::DATA_PAGE_V2, - _ => return Err(general_err!("unexpected parquet page type: {}", value.0)), - }) - } -} - -impl From for crate::format::PageType { - fn from(value: PageType) -> Self { - match value { - PageType::DATA_PAGE => crate::format::PageType::DATA_PAGE, - PageType::INDEX_PAGE => crate::format::PageType::INDEX_PAGE, - PageType::DICTIONARY_PAGE => crate::format::PageType::DICTIONARY_PAGE, - PageType::DATA_PAGE_V2 => crate::format::PageType::DATA_PAGE_V2, - } - } -} - -// ---------------------------------------------------------------------- -// crate::format::PageType <=> PageType conversion - -impl TryFrom for BoundaryOrder { - type Error = ParquetError; - - fn try_from(value: crate::format::BoundaryOrder) -> Result { - Ok(match value { - crate::format::BoundaryOrder::UNORDERED => BoundaryOrder::UNORDERED, - crate::format::BoundaryOrder::ASCENDING => BoundaryOrder::ASCENDING, - crate::format::BoundaryOrder::DESCENDING => BoundaryOrder::DESCENDING, - _ => { - return Err(general_err!( - "unexpected parquet boundary order type: {}", - value.0 - )) - } - }) - } -} - -impl From for crate::format::BoundaryOrder { - fn from(value: BoundaryOrder) -> Self { - match value { - BoundaryOrder::UNORDERED => crate::format::BoundaryOrder::UNORDERED, - BoundaryOrder::ASCENDING => crate::format::BoundaryOrder::ASCENDING, - BoundaryOrder::DESCENDING => crate::format::BoundaryOrder::DESCENDING, - } - } -} - // ---------------------------------------------------------------------- // String conversions for schema parsing. @@ -2633,4 +2689,114 @@ mod tests { "Parquet error: unknown encoding: gzip(-10)" ); } + + #[test] + fn test_display_boundary_order() { + assert_eq!(BoundaryOrder::ASCENDING.to_string(), "ASCENDING"); + assert_eq!(BoundaryOrder::DESCENDING.to_string(), "DESCENDING"); + assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED"); + } + + #[test] + fn test_from_boundary_order() { + assert_eq!( + BoundaryOrder::try_from(crate::format::BoundaryOrder::ASCENDING).unwrap(), + BoundaryOrder::ASCENDING + ); + assert_eq!( + BoundaryOrder::try_from(crate::format::BoundaryOrder::DESCENDING).unwrap(), + BoundaryOrder::DESCENDING + ); + assert_eq!( + BoundaryOrder::try_from(crate::format::BoundaryOrder::UNORDERED).unwrap(), + BoundaryOrder::UNORDERED + ); + } + + #[test] + fn test_into_boundary_order() { + assert_eq!( + crate::format::BoundaryOrder::ASCENDING, + BoundaryOrder::ASCENDING.into() + ); + assert_eq!( + crate::format::BoundaryOrder::DESCENDING, + BoundaryOrder::DESCENDING.into() + ); + assert_eq!( + crate::format::BoundaryOrder::UNORDERED, + BoundaryOrder::UNORDERED.into() + ); + } + + #[test] + fn test_display_edge_algo() { + assert_eq!( + EdgeInterpolationAlgorithm::SPHERICAL.to_string(), + "SPHERICAL" + ); + assert_eq!(EdgeInterpolationAlgorithm::VINCENTY.to_string(), "VINCENTY"); + assert_eq!(EdgeInterpolationAlgorithm::THOMAS.to_string(), "THOMAS"); + assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER"); + assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY"); + } + + #[test] + fn test_from_edge_algo() { + assert_eq!( + EdgeInterpolationAlgorithm::try_from( + crate::format::EdgeInterpolationAlgorithm::SPHERICAL + ) + .unwrap(), + EdgeInterpolationAlgorithm::SPHERICAL + ); + assert_eq!( + EdgeInterpolationAlgorithm::try_from( + crate::format::EdgeInterpolationAlgorithm::VINCENTY + ) + .unwrap(), + EdgeInterpolationAlgorithm::VINCENTY + ); + assert_eq!( + EdgeInterpolationAlgorithm::try_from(crate::format::EdgeInterpolationAlgorithm::THOMAS) + .unwrap(), + EdgeInterpolationAlgorithm::THOMAS + ); + assert_eq!( + EdgeInterpolationAlgorithm::try_from( + crate::format::EdgeInterpolationAlgorithm::ANDOYER + ) + .unwrap(), + EdgeInterpolationAlgorithm::ANDOYER + ); + assert_eq!( + EdgeInterpolationAlgorithm::try_from(crate::format::EdgeInterpolationAlgorithm::KARNEY) + .unwrap(), + EdgeInterpolationAlgorithm::KARNEY + ); + } + + #[test] + fn test_into_edge_algo() { + assert_eq!( + crate::format::EdgeInterpolationAlgorithm::SPHERICAL, + EdgeInterpolationAlgorithm::SPHERICAL.into() + ); + assert_eq!( + crate::format::EdgeInterpolationAlgorithm::VINCENTY, + EdgeInterpolationAlgorithm::VINCENTY.into() + ); + assert_eq!( + crate::format::EdgeInterpolationAlgorithm::THOMAS, + EdgeInterpolationAlgorithm::THOMAS.into() + ); + assert_eq!( + crate::format::EdgeInterpolationAlgorithm::ANDOYER, + EdgeInterpolationAlgorithm::ANDOYER.into() + ); + assert_eq!( + crate::format::EdgeInterpolationAlgorithm::KARNEY, + EdgeInterpolationAlgorithm::KARNEY.into() + ); + } } diff --git a/parquet/src/lib.rs b/parquet/src/lib.rs index 07a673c295bc..74182d265029 100644 --- a/parquet/src/lib.rs +++ b/parquet/src/lib.rs @@ -178,4 +178,6 @@ pub mod file; pub mod record; pub mod schema; +mod parquet_macros; +mod parquet_thrift; pub mod thrift; diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs new file mode 100644 index 000000000000..40e148a75e72 --- /dev/null +++ b/parquet/src/parquet_macros.rs @@ -0,0 +1,264 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// These macros are adapted from Jörn Horstmann's thrift macros at +// https://github.com/jhorstmann/compact-thrift +// They allow for pasting sections of the Parquet thrift IDL file +// into a macro to generate rust structures and implementations. + +#[macro_export] +#[allow(clippy::crate_in_macro_def)] +/// macro to generate rust enums from a thrift enum definition +macro_rules! thrift_enum { + ($(#[$($def_attrs:tt)*])* enum $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_name:ident = $field_value:literal;)* }) => { + $(#[$($def_attrs)*])* + #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] + #[allow(non_camel_case_types)] + #[allow(missing_docs)] + pub enum $identifier { + $($field_name = $field_value,)* + } + + impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let val = prot.read_i32()?; + match val { + $($field_value => Ok(Self::$field_name),)* + _ => Err(general_err!("Unexpected {} {}", stringify!($identifier), val)), + } + } + } + + impl fmt::Display for $identifier { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{self:?}") + } + } + + // TODO: remove when we finally get rid of the format module + impl TryFrom for $identifier { + type Error = ParquetError; + + fn try_from(value: crate::format::$identifier) -> Result { + Ok(match value { + $(crate::format::$identifier::$field_name => Self::$field_name,)* + _ => return Err(general_err!("Unexpected parquet {}: {}", stringify!($identifier), value.0)), + }) + } + } + + impl From<$identifier> for crate::format::$identifier { + fn from(value: $identifier) -> Self { + match value { + $($identifier::$field_name => Self::$field_name,)* + } + } + } + } +} + +#[macro_export] +#[allow(clippy::crate_in_macro_def)] +/// macro to generate rust enums for empty thrift structs used in unions +macro_rules! thrift_empty_struct { + ($identifier: ident) => { + #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] + pub struct $identifier {} + + impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.skip_empty_struct()?; + Ok(Self {}) + } + } + + // TODO: remove when we finally get rid of the format module + impl From for $identifier { + fn from(_: $crate::format::$identifier) -> Self { + Self {} + } + } + + impl From<$identifier> for crate::format::$identifier { + fn from(_: $identifier) -> Self { + Self {} + } + } + }; +} + +/// macro to generate rust enums for thrift unions where all fields are typed with empty structs +#[macro_export] +#[allow(clippy::crate_in_macro_def)] +macro_rules! thrift_union_all_empty { + ($(#[$($def_attrs:tt)*])* union $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $field_type:ident $(< $element_type:ident >)? $field_name:ident $(;)?)* }) => { + $(#[cfg_attr(not(doctest), $($def_attrs)*)])* + #[derive(Clone, Copy, Debug, Eq, PartialEq)] + #[allow(non_camel_case_types)] + #[allow(non_snake_case)] + #[allow(missing_docs)] + pub enum $identifier { + $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name),* + } + + impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { + type Error = ParquetError; + + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_struct_begin()?; + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + return Err(general_err!("Received empty union from remote {}", stringify!($identifier))); + } + let ret = match field_ident.id { + $($field_id => { + prot.skip_empty_struct()?; + Self::$field_name + } + )* + _ => { + return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id)); + } + }; + let field_ident = prot.read_field_begin()?; + if field_ident.field_type != FieldType::Stop { + return Err(general_err!( + "Received multiple fields for union from remote {}", stringify!($identifier) + )); + } + prot.read_struct_end()?; + Ok(ret) + } + } + + // TODO: remove when we finally get rid of the format module + impl From for $identifier { + fn from(value: crate::format::$identifier) -> Self { + match value { + $(crate::format::$identifier::$field_name(_) => Self::$field_name,)* + } + } + } + + impl From<$identifier> for crate::format::$identifier { + fn from(value: $identifier) -> Self { + match value { + $($identifier::$field_name => Self::$field_name(Default::default()),)* + } + } + } + } +} + +/// macro to generate rust structs from a thrift struct definition +#[macro_export] +macro_rules! thrift_private_struct { + ($(#[$($def_attrs:tt)*])* struct $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { + $(#[cfg_attr(not(doctest), $($def_attrs)*)])* + #[derive(Clone, Debug, PartialEq)] + #[allow(non_camel_case_types)] + #[allow(non_snake_case)] + struct $identifier { + $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* pub $field_name: $crate::__thrift_required_or_optional!($required_or_optional $crate::__thrift_field_type!($field_type $($element_type)?))),* + } + + impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + $(let mut $field_name: Option<$field_type> = None;)* + prot.read_struct_begin()?; + loop { + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + break; + } + match field_ident.id { + $($field_id => { + let val = $crate::__thrift_read_field!(prot $field_type); + $field_name = Some(val); + })* + _ => { + prot.skip(field_ident.field_type)?; + } + }; + } + Ok(Self { + $($field_name: $crate::__thrift_result_required_or_optional!($required_or_optional $field_name)),* + }) + } + } + } +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_required_or_optional { + (required $field_type:ty) => { $field_type }; + (optional $field_type:ty) => { Option<$field_type> }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_result_required_or_optional { + (required $field_name:ident) => { + $field_name.expect(&format!( + "Required field {} not present", + stringify!($field_name) + )) + }; + (optional $field_name:ident) => { + $field_name + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_read_field { + ($prot:tt bool) => { + $prot.read_bool()? + }; + ($prot:tt i8) => { + $prot.read_i8()? + }; + ($prot:tt i32) => { + $prot.read_i32()? + }; + ($prot:tt i64) => { + $prot.read_i64()? + }; + ($prot:tt string) => { + $prot.read_string()? + }; + ($prot:tt $field_type:ident) => { + $field_type::try_from(&mut *$prot)? + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_field_type { + (list $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type) > }; + (set $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type) > }; + (binary) => { Vec }; + (string) => { String }; + ($field_type:ty) => { $field_type }; + (Box $element_type:ident) => { std::boxed::Box< $crate::field_type!($element_type) > }; + (Rc $element_type:ident) => { std::rc::Rc< $crate::__thrift_field_type!($element_type) > }; + (Arc $element_type:ident) => { std::sync::Arc< $crate::__thrift_field_type!($element_type) > }; +} diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs new file mode 100644 index 000000000000..2514f3fc70ff --- /dev/null +++ b/parquet/src/parquet_thrift.rs @@ -0,0 +1,415 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! experimental replacement for thrift decoder +// this is a copy of TCompactSliceInputProtocol, but modified +// to not allocate byte arrays or strings. +#![allow(dead_code)] + +use crate::errors::{ParquetError, Result}; + +// Thrift compact protocol types for struct fields. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum FieldType { + Stop = 0, + BooleanTrue = 1, + BooleanFalse = 2, + Byte = 3, + I16 = 4, + I32 = 5, + I64 = 6, + Double = 7, + Binary = 8, + List = 9, + Set = 10, + Map = 11, + Struct = 12, +} + +impl TryFrom for FieldType { + type Error = ParquetError; + fn try_from(value: u8) -> Result { + match value { + 0 => Ok(Self::Stop), + 1 => Ok(Self::BooleanTrue), + 2 => Ok(Self::BooleanFalse), + 3 => Ok(Self::Byte), + 4 => Ok(Self::I16), + 5 => Ok(Self::I32), + 6 => Ok(Self::I64), + 7 => Ok(Self::Double), + 8 => Ok(Self::Binary), + 9 => Ok(Self::List), + 10 => Ok(Self::Set), + 11 => Ok(Self::Map), + 12 => Ok(Self::Struct), + _ => Err(general_err!("Unexpected struct field type{}", value)), + } + } +} + +impl TryFrom for FieldType { + type Error = ParquetError; + fn try_from(value: ElementType) -> std::result::Result { + match value { + ElementType::Bool => Ok(Self::BooleanTrue), + ElementType::Byte => Ok(Self::Byte), + ElementType::I16 => Ok(Self::I16), + ElementType::I32 => Ok(Self::I32), + ElementType::I64 => Ok(Self::I64), + ElementType::Double => Ok(Self::Double), + ElementType::Binary => Ok(Self::Binary), + ElementType::List => Ok(Self::List), + ElementType::Struct => Ok(Self::Struct), + _ => Err(general_err!("Unexpected list element type{:?}", value)), + } + } +} + +// Thrift compact protocol types for list elements +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum ElementType { + Bool = 2, + Byte = 3, + I16 = 4, + I32 = 5, + I64 = 6, + Double = 7, + Binary = 8, + List = 9, + Set = 10, + Map = 11, + Struct = 12, +} + +impl TryFrom for ElementType { + type Error = ParquetError; + fn try_from(value: u8) -> Result { + match value { + // For historical and compatibility reasons, a reader should be capable to deal with both cases. + // The only valid value in the original spec was 2, but due to an widespread implementation bug + // the defacto standard across large parts of the library became 1 instead. + // As a result, both values are now allowed. + // https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set + 1 | 2 => Ok(Self::Bool), + 3 => Ok(Self::Byte), + 4 => Ok(Self::I16), + 5 => Ok(Self::I32), + 6 => Ok(Self::I64), + 7 => Ok(Self::Double), + 8 => Ok(Self::Binary), + 9 => Ok(Self::List), + 10 => Ok(Self::Set), + 11 => Ok(Self::Map), + 12 => Ok(Self::Struct), + _ => Err(general_err!("Unexpected list/set element type{}", value)), + } + } +} + +pub(crate) struct FieldIdentifier { + pub(crate) field_type: FieldType, + pub(crate) id: i16, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +pub(crate) struct ListIdentifier { + pub(crate) element_type: ElementType, + pub(crate) size: i32, +} + +/// A more performant implementation of [`TCompactInputProtocol`] that reads a slice +/// +/// [`TCompactInputProtocol`]: thrift::protocol::TCompactInputProtocol +pub(crate) struct ThriftCompactInputProtocol<'a> { + buf: &'a [u8], + // Identifier of the last field deserialized for a struct. + last_read_field_id: i16, + // Stack of the last read field ids (a new entry is added each time a nested struct is read). + read_field_id_stack: Vec, + // Boolean value for a field. + // Saved because boolean fields and their value are encoded in a single byte, + // and reading the field only occurs after the field id is read. + pending_read_bool_value: Option, +} + +impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { + pub fn new(buf: &'a [u8]) -> Self { + Self { + buf, + last_read_field_id: 0, + read_field_id_stack: Vec::with_capacity(16), + pending_read_bool_value: None, + } + } + + pub fn reset_buffer(&mut self, buf: &'a [u8]) { + self.buf = buf; + self.last_read_field_id = 0; + self.read_field_id_stack.clear(); + self.pending_read_bool_value = None; + } + + pub fn as_slice(&self) -> &'a [u8] { + self.buf + } + + fn read_vlq(&mut self) -> Result { + let mut in_progress = 0; + let mut shift = 0; + loop { + let byte = self.read_byte()?; + in_progress |= ((byte & 0x7F) as u64).wrapping_shl(shift); + shift += 7; + if byte & 0x80 == 0 { + return Ok(in_progress); + } + } + } + + fn read_zig_zag(&mut self) -> Result { + let val = self.read_vlq()?; + Ok((val >> 1) as i64 ^ -((val & 1) as i64)) + } + + fn read_list_set_begin(&mut self) -> Result<(ElementType, i32)> { + let header = self.read_byte()?; + let element_type = ElementType::try_from(header & 0x0f)?; + + let possible_element_count = (header & 0xF0) >> 4; + let element_count = if possible_element_count != 15 { + // high bits set high if count and type encoded separately + possible_element_count as i32 + } else { + self.read_vlq()? as _ + }; + + Ok((element_type, element_count)) + } + + pub(crate) fn read_struct_begin(&mut self) -> Result<()> { + self.read_field_id_stack.push(self.last_read_field_id); + self.last_read_field_id = 0; + Ok(()) + } + + pub(crate) fn read_struct_end(&mut self) -> Result<()> { + self.last_read_field_id = self + .read_field_id_stack + .pop() + .expect("should have previous field ids"); + Ok(()) + } + + pub(crate) fn read_field_begin(&mut self) -> Result { + // we can read at least one byte, which is: + // - the type + // - the field delta and the type + let field_type = self.read_byte()?; + let field_delta = (field_type & 0xf0) >> 4; + let field_type = FieldType::try_from(field_type & 0xf)?; + + match field_type { + FieldType::Stop => Ok(FieldIdentifier { + field_type: FieldType::Stop, + id: 0, + }), + _ => { + // special handling for bools + if field_type == FieldType::BooleanFalse { + self.pending_read_bool_value = Some(false); + } else if field_type == FieldType::BooleanTrue { + self.pending_read_bool_value = Some(true); + } + if field_delta != 0 { + self.last_read_field_id = self + .last_read_field_id + .checked_add(field_delta as i16) + .map_or_else( + || { + Err(general_err!(format!( + "cannot add {} to {}", + field_delta, self.last_read_field_id + ))) + }, + Ok, + )?; + } else { + self.last_read_field_id = self.read_i16()?; + }; + + Ok(FieldIdentifier { + field_type, + id: self.last_read_field_id, + }) + } + } + } + + pub(crate) fn read_bool(&mut self) -> Result { + match self.pending_read_bool_value.take() { + Some(b) => Ok(b), + None => { + let b = self.read_byte()?; + // Previous versions of the thrift specification said to use 0 and 1 inside collections, + // but that differed from existing implementations. + // The specification was updated in https://github.com/apache/thrift/commit/2c29c5665bc442e703480bb0ee60fe925ffe02e8. + // At least the go implementation seems to have followed the previously documented values. + match b { + 0x01 => Ok(true), + 0x00 | 0x02 => Ok(false), + unkn => Err(general_err!(format!("cannot convert {unkn} into bool"))), + } + } + } + } + + pub(crate) fn read_bytes(&mut self) -> Result<&'b [u8]> { + let len = self.read_vlq()? as usize; + let ret = self.buf.get(..len).ok_or_else(eof_error)?; + self.buf = &self.buf[len..]; + Ok(ret) + } + + pub(crate) fn read_string(&mut self) -> Result<&'b str> { + let slice = self.read_bytes()?; + Ok(std::str::from_utf8(slice)?) + } + + pub(crate) fn read_i8(&mut self) -> Result { + Ok(self.read_byte()? as _) + } + + pub(crate) fn read_i16(&mut self) -> Result { + Ok(self.read_zig_zag()? as _) + } + + pub(crate) fn read_i32(&mut self) -> Result { + Ok(self.read_zig_zag()? as _) + } + + pub(crate) fn read_i64(&mut self) -> Result { + self.read_zig_zag() + } + + pub(crate) fn read_double(&mut self) -> Result { + let slice = self.buf.get(..8).ok_or_else(eof_error)?; + self.buf = &self.buf[8..]; + match slice.try_into() { + Ok(slice) => Ok(f64::from_le_bytes(slice)), + Err(_) => Err(general_err!("Unexpected error converting slice")), + } + } + + pub(crate) fn read_list_begin(&mut self) -> Result { + let (element_type, element_count) = self.read_list_set_begin()?; + Ok(ListIdentifier { + element_type, + size: element_count, + }) + } + + pub(crate) fn read_list_end(&mut self) -> Result<()> { + Ok(()) + } + + #[inline] + fn read_byte(&mut self) -> Result { + let ret = *self.buf.first().ok_or_else(eof_error)?; + self.buf = &self.buf[1..]; + Ok(ret) + } + + #[inline] + fn skip_bytes(&mut self, n: usize) -> Result<()> { + self.buf.get(..n).ok_or_else(eof_error)?; + self.buf = &self.buf[n..]; + Ok(()) + } + + fn skip_vlq(&mut self) -> Result<()> { + loop { + let byte = self.read_byte()?; + if byte & 0x80 == 0 { + return Ok(()); + } + } + } + + /// Skip a field with type `field_type` recursively until the default + /// maximum skip depth is reached. + pub(crate) fn skip(&mut self, field_type: FieldType) -> Result<()> { + // TODO: magic number + self.skip_till_depth(field_type, 64) + } + + /// Empty structs in unions consist of a single byte of 0 for the field stop record. + /// This skips that byte without pushing to the field id stack. + pub(crate) fn skip_empty_struct(&mut self) -> Result<()> { + let b = self.read_byte()?; + if b != 0 { + Err(general_err!("Empty struct has fields")) + } else { + Ok(()) + } + } + + /// Skip a field with type `field_type` recursively up to `depth` levels. + fn skip_till_depth(&mut self, field_type: FieldType, depth: i8) -> Result<()> { + if depth == 0 { + return Err(general_err!(format!("cannot parse past {:?}", field_type))); + } + + match field_type { + FieldType::BooleanFalse | FieldType::BooleanTrue => self.read_bool().map(|_| ()), + FieldType::Byte => self.read_i8().map(|_| ()), + FieldType::I16 => self.skip_vlq().map(|_| ()), + FieldType::I32 => self.skip_vlq().map(|_| ()), + FieldType::I64 => self.skip_vlq().map(|_| ()), + FieldType::Double => self.skip_bytes(8).map(|_| ()), + FieldType::Binary => { + let len = self.read_vlq()? as usize; + self.skip_bytes(len) + } + FieldType::Struct => { + self.read_struct_begin()?; + loop { + let field_ident = self.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + break; + } + self.skip_till_depth(field_ident.field_type, depth - 1)?; + } + self.read_struct_end() + } + FieldType::List => { + let list_ident = self.read_list_begin()?; + for _ in 0..list_ident.size { + let element_type = FieldType::try_from(list_ident.element_type)?; + self.skip_till_depth(element_type, depth - 1)?; + } + self.read_list_end() + } + // no list or map types in parquet format + u => Err(general_err!(format!("cannot skip field type {:?}", &u))), + } + } +} + +fn eof_error() -> ParquetError { + eof_err!("Unexpected EOF") +} diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs index 4190f9717dcd..fd28e13d2edf 100644 --- a/parquet/src/schema/printer.rs +++ b/parquet/src/schema/printer.rs @@ -326,10 +326,15 @@ fn print_logical_and_converted( LogicalType::List => "LIST".to_string(), LogicalType::Map => "MAP".to_string(), LogicalType::Float16 => "FLOAT16".to_string(), - LogicalType::Variant => "VARIANT".to_string(), - LogicalType::Geometry => "GEOMETRY".to_string(), - LogicalType::Geography => "GEOGRAPHY".to_string(), + LogicalType::Variant { + specification_version, + } => format!("VARIANT({specification_version:?})"), + LogicalType::Geometry { crs } => format!("GEOMETRY({crs:?})"), + LogicalType::Geography { crs, algorithm } => { + format!("GEOGRAPHY({crs:?},{algorithm:?})") + } LogicalType::Unknown => "UNKNOWN".to_string(), + LogicalType::_Unknown { field_id } => format!("_Unknown({field_id})"), }, None => { // Also print converted type if it is available diff --git a/parquet/tests/arrow_reader/bad_data.rs b/parquet/tests/arrow_reader/bad_data.rs index ba50e738f6cf..b340e97a3c43 100644 --- a/parquet/tests/arrow_reader/bad_data.rs +++ b/parquet/tests/arrow_reader/bad_data.rs @@ -82,7 +82,7 @@ fn test_parquet_1481() { let err = read_file("PARQUET-1481.parquet").unwrap_err(); assert_eq!( err.to_string(), - "Parquet error: unexpected parquet type: -7" + "Parquet error: Unexpected parquet Type: -7" ); } From f315dbe1e32b7cc68353c2fcc3aa93d5c9099e68 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Fri, 15 Aug 2025 08:09:01 -0700 Subject: [PATCH 03/15] [thrift-remodel] Complete decoding of `FileMetaData` and `RowGroupMetaData` (#8111) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change # What changes are included in this PR? This PR completes reading of the `FileMetaData` and `RowGroupMetaData` pieces of the `ParquetMetaData`. Column indexes and encryption will be follow-on work. This replaces the macro for generating structs with a more general one that can take visibility and lifetime specifiers. Also (temporarily) adds a new function `ParquetMetaDataReader::decode_file_metadata` which should be a drop-in replacement for `ParquetMetaDataReader::decode_metadata`. Still todo: 1. Add some tests that verify this produces the same output as `ParquetMetaDataReader::decode_metadata` 2. Read column indexes with new decoder 3. Read page headers with new decoder 4. Integrate with @alamb's push decoder work #8080 # Are these changes tested? Not yet # Are there any user-facing changes? Yes --- parquet/benches/metadata.rs | 31 +- parquet/src/basic.rs | 77 +--- parquet/src/encryption/decrypt.rs | 4 +- parquet/src/encryption/encrypt.rs | 4 +- parquet/src/file/column_crypto_metadata.rs | 46 +- parquet/src/file/metadata/mod.rs | 49 +- parquet/src/file/metadata/reader.rs | 49 +- parquet/src/file/metadata/thrift_gen.rs | 509 +++++++++++++++++++++ parquet/src/file/page_encoding_stats.rs | 17 +- parquet/src/parquet_macros.rs | 193 +++++--- parquet/src/parquet_thrift.rs | 121 ++++- parquet/src/schema/types.rs | 178 ++++++- 12 files changed, 1081 insertions(+), 197 deletions(-) create mode 100644 parquet/src/file/metadata/thrift_gen.rs diff --git a/parquet/benches/metadata.rs b/parquet/benches/metadata.rs index bb2eca0a75c1..3c293462a157 100644 --- a/parquet/benches/metadata.rs +++ b/parquet/benches/metadata.rs @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +use parquet::file::metadata::ParquetMetaDataReader; use rand::Rng; use thrift::protocol::TCompactOutputProtocol; @@ -198,19 +199,43 @@ fn criterion_benchmark(c: &mut Criterion) { }); let meta_data = get_footer_bytes(data.clone()); - c.bench_function("decode file metadata", |b| { + c.bench_function("decode parquet metadata", |b| { + b.iter(|| { + ParquetMetaDataReader::decode_metadata(&meta_data).unwrap(); + }) + }); + + c.bench_function("decode thrift file metadata", |b| { b.iter(|| { parquet::thrift::bench_file_metadata(&meta_data); }) }); - let buf = black_box(encoded_meta()).into(); - c.bench_function("decode file metadata (wide)", |b| { + c.bench_function("decode parquet metadata new", |b| { + b.iter(|| { + ParquetMetaDataReader::decode_file_metadata(&meta_data).unwrap(); + }) + }); + + let buf: Bytes = black_box(encoded_meta()).into(); + c.bench_function("decode parquet metadata (wide)", |b| { + b.iter(|| { + ParquetMetaDataReader::decode_metadata(&buf).unwrap(); + }) + }); + + c.bench_function("decode thrift file metadata (wide)", |b| { b.iter(|| { parquet::thrift::bench_file_metadata(&buf); }) }); + c.bench_function("decode parquet metadata new (wide)", |b| { + b.iter(|| { + ParquetMetaDataReader::decode_file_metadata(&buf).unwrap(); + }) + }); + // rewrite file with page statistics. then read page headers. #[cfg(feature = "arrow")] let (file_bytes, metadata) = rewrite_file(data.clone()); diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 92d70a933a03..78d294acd577 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -25,7 +25,7 @@ use std::{fmt, str}; pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel}; use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; -use crate::{thrift_enum, thrift_private_struct, thrift_union_all_empty}; +use crate::{thrift_enum, thrift_struct, thrift_union_all_empty}; use crate::errors::{ParquetError, Result}; @@ -210,14 +210,14 @@ union TimeUnit { // private structs for decoding logical type -thrift_private_struct!( +thrift_struct!( struct DecimalType { 1: required i32 scale 2: required i32 precision } ); -thrift_private_struct!( +thrift_struct!( struct TimestampType { 1: required bool is_adjusted_to_u_t_c 2: required TimeUnit unit @@ -227,14 +227,14 @@ struct TimestampType { // they are identical use TimestampType as TimeType; -thrift_private_struct!( +thrift_struct!( struct IntType { 1: required i8 bit_width 2: required bool is_signed } ); -thrift_private_struct!( +thrift_struct!( struct VariantType { // The version of the variant specification that the variant was // written with. @@ -242,69 +242,18 @@ struct VariantType { } ); -// TODO need macro for structs that need lifetime annotation +thrift_struct!( struct GeometryType<'a> { - crs: Option<&'a str>, -} - -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for GeometryType<'a> { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - let mut crs: Option<&str> = None; - prot.read_struct_begin()?; - loop { - let field_ident = prot.read_field_begin()?; - if field_ident.field_type == FieldType::Stop { - break; - } - match field_ident.id { - 1 => { - let val = prot.read_string()?; - crs = Some(val); - } - _ => { - prot.skip(field_ident.field_type)?; - } - }; - } - Ok(Self { crs }) - } + 1: optional string<'a> crs; } +); +thrift_struct!( struct GeographyType<'a> { - crs: Option<&'a str>, - algorithm: Option, -} - -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for GeographyType<'a> { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - let mut crs: Option<&str> = None; - let mut algorithm: Option = None; - prot.read_struct_begin()?; - loop { - let field_ident = prot.read_field_begin()?; - if field_ident.field_type == FieldType::Stop { - break; - } - match field_ident.id { - 1 => { - let val = prot.read_string()?; - crs = Some(val); - } - 2 => { - let val = EdgeInterpolationAlgorithm::try_from(&mut *prot)?; - algorithm = Some(val); - } - - _ => { - prot.skip(field_ident.field_type)?; - } - }; - } - Ok(Self { crs, algorithm }) - } + 1: optional string<'a> crs; + 2: optional EdgeInterpolationAlgorithm algorithm; } +); /// Logical types used by version 2.4.0+ of the Parquet format. /// @@ -971,7 +920,7 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ColumnOrder { } let ret = match field_ident.id { 1 => { - // TODO: the sort order needs to be set correctly after parsing. + // NOTE: the sort order needs to be set correctly after parsing. prot.skip_empty_struct()?; Self::TYPE_DEFINED_ORDER(SortOrder::SIGNED) } diff --git a/parquet/src/encryption/decrypt.rs b/parquet/src/encryption/decrypt.rs index d9b9ff0326b4..d285f6a1237c 100644 --- a/parquet/src/encryption/decrypt.rs +++ b/parquet/src/encryption/decrypt.rs @@ -142,13 +142,13 @@ impl CryptoContext { column_ordinal: usize, ) -> Result { let (data_decryptor, metadata_decryptor) = match column_crypto_metadata { - ColumnCryptoMetaData::EncryptionWithFooterKey => { + ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY => { // TODO: In GCM-CTR mode will this need to be a non-GCM decryptor? let data_decryptor = file_decryptor.get_footer_decryptor()?; let metadata_decryptor = file_decryptor.get_footer_decryptor()?; (data_decryptor, metadata_decryptor) } - ColumnCryptoMetaData::EncryptionWithColumnKey(column_key_encryption) => { + ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(column_key_encryption) => { let key_metadata = &column_key_encryption.key_metadata; let full_column_name; let column_name = if column_key_encryption.path_in_schema.len() == 1 { diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index c8d3ffc0eef4..1a241bf7b170 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -421,14 +421,14 @@ pub(crate) fn get_column_crypto_metadata( ) -> Option { if properties.column_keys.is_empty() { // Uniform encryption - Some(ColumnCryptoMetaData::EncryptionWithFooterKey) + Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY) } else { properties .column_keys .get(&column.path().string()) .map(|encryption_key| { // Column is encrypted with a column specific key - ColumnCryptoMetaData::EncryptionWithColumnKey(EncryptionWithColumnKey { + ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey { path_in_schema: column.path().parts().to_vec(), key_metadata: encryption_key.key_metadata.clone(), }) diff --git a/parquet/src/file/column_crypto_metadata.rs b/parquet/src/file/column_crypto_metadata.rs index af670e675fcd..95cbc65cf716 100644 --- a/parquet/src/file/column_crypto_metadata.rs +++ b/parquet/src/file/column_crypto_metadata.rs @@ -17,30 +17,36 @@ //! Column chunk encryption metadata -use crate::errors::Result; +use crate::errors::{ParquetError, Result}; use crate::format::{ ColumnCryptoMetaData as TColumnCryptoMetaData, EncryptionWithColumnKey as TEncryptionWithColumnKey, EncryptionWithFooterKey as TEncryptionWithFooterKey, }; +use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::{thrift_struct, thrift_union}; -/// ColumnCryptoMetadata for a column chunk -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum ColumnCryptoMetaData { - /// The column is encrypted with the footer key - EncryptionWithFooterKey, - /// The column is encrypted with a column-specific key - EncryptionWithColumnKey(EncryptionWithColumnKey), -} +// define this and ColumnCryptoMetadata here so they're only defined when +// the encryption feature is enabled +thrift_struct!( /// Encryption metadata for a column chunk encrypted with a column-specific key -#[derive(Clone, Debug, PartialEq, Eq)] pub struct EncryptionWithColumnKey { - /// Path to the column in the Parquet schema - pub path_in_schema: Vec, - /// Metadata required to retrieve the column encryption key - pub key_metadata: Option>, + /// Path to the column in the Parquet schema + 1: required list path_in_schema + + /// Path to the column in the Parquet schema + 2: optional binary key_metadata +} +); + +thrift_union!( +/// ColumnCryptoMetadata for a column chunk +union ColumnCryptoMetaData { + 1: ENCRYPTION_WITH_FOOTER_KEY + 2: (EncryptionWithColumnKey) ENCRYPTION_WITH_COLUMN_KEY } +); /// Converts Thrift definition into `ColumnCryptoMetadata`. pub fn try_from_thrift( @@ -48,10 +54,10 @@ pub fn try_from_thrift( ) -> Result { let crypto_metadata = match thrift_column_crypto_metadata { TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_) => { - ColumnCryptoMetaData::EncryptionWithFooterKey + ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY } TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(encryption_with_column_key) => { - ColumnCryptoMetaData::EncryptionWithColumnKey(EncryptionWithColumnKey { + ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey { path_in_schema: encryption_with_column_key.path_in_schema.clone(), key_metadata: encryption_with_column_key.key_metadata.clone(), }) @@ -63,10 +69,10 @@ pub fn try_from_thrift( /// Converts `ColumnCryptoMetadata` into Thrift definition. pub fn to_thrift(column_crypto_metadata: &ColumnCryptoMetaData) -> TColumnCryptoMetaData { match column_crypto_metadata { - ColumnCryptoMetaData::EncryptionWithFooterKey => { + ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY => { TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(TEncryptionWithFooterKey {}) } - ColumnCryptoMetaData::EncryptionWithColumnKey(encryption_with_column_key) => { + ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(encryption_with_column_key) => { TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(TEncryptionWithColumnKey { path_in_schema: encryption_with_column_key.path_in_schema.clone(), key_metadata: encryption_with_column_key.key_metadata.clone(), @@ -81,14 +87,14 @@ mod tests { #[test] fn test_encryption_with_footer_key_from_thrift() { - let metadata = ColumnCryptoMetaData::EncryptionWithFooterKey; + let metadata = ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY; assert_eq!(try_from_thrift(&to_thrift(&metadata)).unwrap(), metadata); } #[test] fn test_encryption_with_column_key_from_thrift() { - let metadata = ColumnCryptoMetaData::EncryptionWithColumnKey(EncryptionWithColumnKey { + let metadata = ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey { path_in_schema: vec!["abc".to_owned(), "def".to_owned()], key_metadata: Some(vec![0, 1, 2, 3, 4, 5]), }); diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 193b70d9dd4a..d8102761186c 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -93,9 +93,9 @@ //! ``` mod memory; pub(crate) mod reader; +pub(crate) mod thrift_gen; mod writer; -use crate::basic::{ColumnOrder, Compression, Encoding, Type}; #[cfg(feature = "encryption")] use crate::encryption::{ decrypt::FileDecryptor, @@ -104,7 +104,6 @@ use crate::encryption::{ #[cfg(feature = "encryption")] use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData}; pub(crate) use crate::file::metadata::memory::HeapSize; -use crate::file::page_index::index::{Index, NativeIndex}; use crate::file::{ page_encoding_stats::{self, PageEncodingStats}, page_index::offset_index::PageLocation, @@ -124,9 +123,17 @@ use crate::{ basic::BoundaryOrder, errors::{ParquetError, Result}, }; +use crate::{ + basic::{ColumnOrder, Compression, Encoding, Type}, + parquet_thrift::{FieldType, ThriftCompactInputProtocol}, +}; use crate::{ data_type::private::ParquetValueType, file::page_index::offset_index::OffsetIndexMetaData, }; +use crate::{ + file::page_index::index::{Index, NativeIndex}, + thrift_struct, +}; pub use reader::{FooterTail, ParquetMetaDataReader}; use std::ops::Range; use std::sync::Arc; @@ -423,14 +430,14 @@ impl From for ParquetMetaDataBuilder { } } +// TODO: should this move to thrift_gen? +thrift_struct!( /// A key-value pair for [`FileMetaData`]. -#[derive(Clone, Debug, Eq, PartialEq)] pub struct KeyValue { - /// The key. - pub key: String, - /// An optional value. - pub value: Option, + 1: required string key + 2: optional string value } +); impl KeyValue { /// Create a new key value pair @@ -546,17 +553,21 @@ impl FileMetaData { } } +// TODO: should this move to thrift_gen? +thrift_struct!( /// Sort order within a RowGroup of a leaf column -#[derive(Clone, Debug, Eq, PartialEq)] pub struct SortingColumn { - /// The ordinal position of the column (in this row group) * - pub column_idx: i32, - /// If true, indicates this column is sorted in descending order. * - pub descending: bool, - /// If true, nulls will come before non-null values, otherwise, - /// nulls go at the end. - pub nulls_first: bool, + /// The ordinal position of the column (in this row group) + 1: required i32 column_idx + + /// If true, indicates this column is sorted in descending order. + 2: required bool descending + + /// If true, nulls will come before non-null values, otherwise, + /// nulls go at the end. */ + 3: required bool nulls_first } +); impl From<&crate::format::SortingColumn> for SortingColumn { fn from(value: &crate::format::SortingColumn) -> Self { @@ -2203,9 +2214,9 @@ mod tests { .build(); #[cfg(not(feature = "encryption"))] - let base_expected_size = 2312; + let base_expected_size = 2280; #[cfg(feature = "encryption")] - let base_expected_size = 2648; + let base_expected_size = 2616; assert_eq!(parquet_meta.memory_size(), base_expected_size); @@ -2233,9 +2244,9 @@ mod tests { .build(); #[cfg(not(feature = "encryption"))] - let bigger_expected_size = 2816; + let bigger_expected_size = 2784; #[cfg(feature = "encryption")] - let bigger_expected_size = 3152; + let bigger_expected_size = 3120; // more set fields means more memory usage assert!(bigger_expected_size > base_expected_size); diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 53ae01221976..4905469737a0 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -17,30 +17,33 @@ use std::{io::Read, ops::Range, sync::Arc}; +use crate::{ + basic::ColumnOrder, + file::metadata::{FileMetaData, KeyValue}, + parquet_thrift::ThriftCompactInputProtocol, +}; #[cfg(feature = "encryption")] -use crate::encryption::{ - decrypt::{FileDecryptionProperties, FileDecryptor}, - modules::create_footer_aad, +use crate::{ + encryption::{ + decrypt::{CryptoContext, FileDecryptionProperties, FileDecryptor}, + modules::create_footer_aad, + }, + format::{EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData}, }; -use crate::{basic::ColumnOrder, file::metadata::KeyValue}; use bytes::Bytes; use crate::errors::{ParquetError, Result}; -use crate::file::metadata::{ColumnChunkMetaData, FileMetaData, ParquetMetaData, RowGroupMetaData}; +use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaData, RowGroupMetaData}; use crate::file::page_index::index::Index; use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index}; use crate::file::reader::ChunkReader; use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER}; -#[cfg(feature = "encryption")] -use crate::format::{EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData}; use crate::schema::types; use crate::schema::types::SchemaDescriptor; use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; #[cfg(all(feature = "async", feature = "arrow"))] use crate::arrow::async_reader::{MetadataFetch, MetadataSuffixFetch}; -#[cfg(feature = "encryption")] -use crate::encryption::decrypt::CryptoContext; use crate::file::page_index::offset_index::OffsetIndexMetaData; /// Reads the [`ParquetMetaData`] from a byte stream. @@ -1040,6 +1043,12 @@ impl ParquetMetaDataReader { Ok(ParquetMetaData::new(file_metadata, row_groups)) } + /// create meta data from thrift encoded bytes + pub fn decode_file_metadata(buf: &[u8]) -> Result { + let mut prot = ThriftCompactInputProtocol::new(buf); + ParquetMetaData::try_from(&mut prot) + } + /// Parses column orders from Thrift definition. /// If no column orders are defined, returns `None`. fn parse_column_orders( @@ -1106,6 +1115,7 @@ fn get_file_decryptor( mod tests { use super::*; use bytes::Bytes; + use zstd::zstd_safe::WriteBuf; use crate::basic::SortOrder; use crate::basic::Type; @@ -1309,6 +1319,27 @@ mod tests { "EOF: Parquet file too small. Size is 1728 but need 1729" ); } + + #[test] + fn test_new_decoder() { + let file = get_test_file("alltypes_tiny_pages.parquet"); + let len = file.len(); + + // read entire file + let bytes = file.get_bytes(0, len as usize).unwrap(); + let mut footer = [0u8; FOOTER_SIZE]; + footer.copy_from_slice(bytes.slice(len as usize - FOOTER_SIZE..).as_slice()); + let tail = ParquetMetaDataReader::decode_footer_tail(&footer).unwrap(); + let meta_len = tail.metadata_length(); + let metadata_bytes = bytes.slice(len as usize - FOOTER_SIZE - meta_len..); + + // get ParquetMetaData + let m = ParquetMetaDataReader::decode_file_metadata(&metadata_bytes).unwrap(); + let m2 = ParquetMetaDataReader::decode_metadata(&metadata_bytes).unwrap(); + + // check that metadatas are equivalent + assert_eq!(m, m2); + } } #[cfg(all(feature = "async", feature = "arrow", test))] diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs new file mode 100644 index 000000000000..3888d247df1c --- /dev/null +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -0,0 +1,509 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// a collection of generated structs used to parse thrift metadata + +use std::sync::Arc; + +#[cfg(feature = "encryption")] +use crate::file::column_crypto_metadata::ColumnCryptoMetaData; +use crate::{ + basic::{ColumnOrder, Compression, ConvertedType, Encoding, LogicalType, Repetition, Type}, + data_type::{ByteArray, FixedLenByteArray, Int96}, + errors::{ParquetError, Result}, + file::{ + metadata::{ + ColumnChunkMetaData, KeyValue, LevelHistogram, ParquetMetaData, RowGroupMetaData, + SortingColumn, + }, + page_encoding_stats::PageEncodingStats, + statistics::ValueStatistics, + }, + parquet_thrift::{FieldType, ThriftCompactInputProtocol}, + schema::types::{parquet_schema_from_array, ColumnDescriptor, SchemaDescriptor}, + thrift_struct, + util::bit_util::FromBytes, +}; + +// this needs to be visible to the schema conversion code +thrift_struct!( +pub(crate) struct SchemaElement<'a> { + /** Data type for this field. Not set if the current element is a non-leaf node */ + 1: optional Type type_; + 2: optional i32 type_length; + 3: optional Repetition repetition_type; + 4: required string<'a> name; + 5: optional i32 num_children; + 6: optional ConvertedType converted_type; + 7: optional i32 scale + 8: optional i32 precision + 9: optional i32 field_id; + 10: optional LogicalType logical_type +} +); + +// the following are only used internally so are private +thrift_struct!( +struct FileMetaData<'a> { + /** Version of this file **/ + 1: required i32 version + 2: required list<'a> schema; + 3: required i64 num_rows + 4: required list<'a> row_groups + 5: optional list key_value_metadata + 6: optional string created_by + 7: optional list column_orders; + //8: optional EncryptionAlgorithm encryption_algorithm + //9: optional binary footer_signing_key_metadata +} +); + +thrift_struct!( +struct RowGroup<'a> { + 1: required list<'a> columns + 2: required i64 total_byte_size + 3: required i64 num_rows + 4: optional list sorting_columns + 5: optional i64 file_offset + // we don't expose total_compressed_size so skip + //6: optional i64 total_compressed_size + 7: optional i16 ordinal +} +); + +#[cfg(feature = "encryption")] +thrift_struct!( +struct ColumnChunk<'a> { + 1: optional string<'a> file_path + 2: required i64 file_offset = 0 + 3: optional ColumnMetaData<'a> meta_data + 4: optional i64 offset_index_offset + 5: optional i32 offset_index_length + 6: optional i64 column_index_offset + 7: optional i32 column_index_length + 8: optional ColumnCryptoMetaData crypto_metadata + 9: optional binary<'a> encrypted_column_metadata +} +); +#[cfg(not(feature = "encryption"))] +thrift_struct!( +struct ColumnChunk<'a> { + 1: optional string file_path + 2: required i64 file_offset = 0 + 3: optional ColumnMetaData<'a> meta_data + 4: optional i64 offset_index_offset + 5: optional i32 offset_index_length + 6: optional i64 column_index_offset + 7: optional i32 column_index_length +} +); + +type CompressionCodec = Compression; +thrift_struct!( +struct ColumnMetaData<'a> { + 1: required Type type_ + 2: required list encodings + // we don't expose path_in_schema so skip + //3: required list path_in_schema + 4: required CompressionCodec codec + 5: required i64 num_values + 6: required i64 total_uncompressed_size + 7: required i64 total_compressed_size + // we don't expose key_value_metadata so skip + //8: optional list key_value_metadata + 9: required i64 data_page_offset + 10: optional i64 index_page_offset + 11: optional i64 dictionary_page_offset + 12: optional Statistics<'a> statistics + 13: optional list encoding_stats; + 14: optional i64 bloom_filter_offset; + 15: optional i32 bloom_filter_length; + 16: optional SizeStatistics size_statistics; + 17: optional GeospatialStatistics geospatial_statistics; +} +); + +thrift_struct!( +struct BoundingBox { + 1: required double xmin; + 2: required double xmax; + 3: required double ymin; + 4: required double ymax; + 5: optional double zmin; + 6: optional double zmax; + 7: optional double mmin; + 8: optional double mmax; +} +); + +thrift_struct!( +struct GeospatialStatistics { + /** A bounding box of geospatial instances */ + 1: optional BoundingBox bbox; + /** Geospatial type codes of all instances, or an empty list if not known */ + 2: optional list geospatial_types; +} +); + +thrift_struct!( +struct SizeStatistics { + 1: optional i64 unencoded_byte_array_data_bytes; + 2: optional list repetition_level_histogram; + 3: optional list definition_level_histogram; +} +); + +thrift_struct!( +struct Statistics<'a> { + 1: optional binary<'a> max; + 2: optional binary<'a> min; + 3: optional i64 null_count; + 4: optional i64 distinct_count; + 5: optional binary<'a> max_value; + 6: optional binary<'a> min_value; + 7: optional bool is_max_value_exact; + 8: optional bool is_min_value_exact; +} +); + +// convert collection of thrift RowGroups into RowGroupMetaData +fn convert_row_groups( + mut row_groups: Vec, + schema_descr: Arc, +) -> Result> { + let mut res: Vec = Vec::with_capacity(row_groups.len()); + for rg in row_groups.drain(0..) { + res.push(convert_row_group(rg, schema_descr.clone())?); + } + + Ok(res) +} + +fn convert_row_group( + row_group: RowGroup, + schema_descr: Arc, +) -> Result { + let num_rows = row_group.num_rows; + let sorting_columns = row_group.sorting_columns; + let total_byte_size = row_group.total_byte_size; + let file_offset = row_group.file_offset; + let ordinal = row_group.ordinal; + + let columns = convert_columns(row_group.columns, schema_descr.clone())?; + + Ok(RowGroupMetaData { + columns, + num_rows, + sorting_columns, + total_byte_size, + schema_descr, + file_offset, + ordinal, + }) +} + +fn convert_columns( + mut columns: Vec, + schema_descr: Arc, +) -> Result> { + let mut res: Vec = Vec::with_capacity(columns.len()); + for (c, d) in columns.drain(0..).zip(schema_descr.columns()) { + res.push(convert_column(c, d.clone())?); + } + + Ok(res) +} + +fn convert_column( + column: ColumnChunk, + column_descr: Arc, +) -> Result { + if column.meta_data.is_none() { + return Err(general_err!("Expected to have column metadata")); + } + let col_metadata = column.meta_data.unwrap(); + let column_type = col_metadata.type_; + let encodings = col_metadata.encodings; + let compression = col_metadata.codec; + let file_path = column.file_path.map(|v| v.to_owned()); + let file_offset = column.file_offset; + let num_values = col_metadata.num_values; + let total_compressed_size = col_metadata.total_compressed_size; + let total_uncompressed_size = col_metadata.total_uncompressed_size; + let data_page_offset = col_metadata.data_page_offset; + let index_page_offset = col_metadata.index_page_offset; + let dictionary_page_offset = col_metadata.dictionary_page_offset; + let statistics = convert_stats(column_type, col_metadata.statistics)?; + let encoding_stats = col_metadata.encoding_stats; + let bloom_filter_offset = col_metadata.bloom_filter_offset; + let bloom_filter_length = col_metadata.bloom_filter_length; + let offset_index_offset = column.offset_index_offset; + let offset_index_length = column.offset_index_length; + let column_index_offset = column.column_index_offset; + let column_index_length = column.column_index_length; + let (unencoded_byte_array_data_bytes, repetition_level_histogram, definition_level_histogram) = + if let Some(size_stats) = col_metadata.size_statistics { + ( + size_stats.unencoded_byte_array_data_bytes, + size_stats.repetition_level_histogram, + size_stats.definition_level_histogram, + ) + } else { + (None, None, None) + }; + + let repetition_level_histogram = repetition_level_histogram.map(LevelHistogram::from); + let definition_level_histogram = definition_level_histogram.map(LevelHistogram::from); + + // FIXME: need column crypto + + let result = ColumnChunkMetaData { + column_descr, + encodings, + file_path, + file_offset, + num_values, + compression, + total_compressed_size, + total_uncompressed_size, + data_page_offset, + index_page_offset, + dictionary_page_offset, + statistics, + encoding_stats, + bloom_filter_offset, + bloom_filter_length, + offset_index_offset, + offset_index_length, + column_index_offset, + column_index_length, + unencoded_byte_array_data_bytes, + repetition_level_histogram, + definition_level_histogram, + #[cfg(feature = "encryption")] + column_crypto_metadata: column.crypto_metadata, + }; + Ok(result) +} + +fn convert_stats( + physical_type: Type, + thrift_stats: Option, +) -> Result> { + use crate::file::statistics::Statistics as FStatistics; + Ok(match thrift_stats { + Some(stats) => { + // Number of nulls recorded, when it is not available, we just mark it as 0. + // TODO this should be `None` if there is no information about NULLS. + // see https://github.com/apache/arrow-rs/pull/6216/files + let null_count = stats.null_count.unwrap_or(0); + + if null_count < 0 { + return Err(ParquetError::General(format!( + "Statistics null count is negative {null_count}", + ))); + } + + // Generic null count. + let null_count = Some(null_count as u64); + // Generic distinct count (count of distinct values occurring) + let distinct_count = stats.distinct_count.map(|value| value as u64); + // Whether or not statistics use deprecated min/max fields. + let old_format = stats.min_value.is_none() && stats.max_value.is_none(); + // Generic min value as bytes. + let min = if old_format { + stats.min + } else { + stats.min_value + }; + // Generic max value as bytes. + let max = if old_format { + stats.max + } else { + stats.max_value + }; + + fn check_len(min: &Option<&[u8]>, max: &Option<&[u8]>, len: usize) -> Result<()> { + if let Some(min) = min { + if min.len() < len { + return Err(ParquetError::General( + "Insufficient bytes to parse min statistic".to_string(), + )); + } + } + if let Some(max) = max { + if max.len() < len { + return Err(ParquetError::General( + "Insufficient bytes to parse max statistic".to_string(), + )); + } + } + Ok(()) + } + + match physical_type { + Type::BOOLEAN => check_len(&min, &max, 1), + Type::INT32 | Type::FLOAT => check_len(&min, &max, 4), + Type::INT64 | Type::DOUBLE => check_len(&min, &max, 8), + Type::INT96 => check_len(&min, &max, 12), + _ => Ok(()), + }?; + + // Values are encoded using PLAIN encoding definition, except that + // variable-length byte arrays do not include a length prefix. + // + // Instead of using actual decoder, we manually convert values. + let res = match physical_type { + Type::BOOLEAN => FStatistics::boolean( + min.map(|data| data[0] != 0), + max.map(|data| data[0] != 0), + distinct_count, + null_count, + old_format, + ), + Type::INT32 => FStatistics::int32( + min.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), + max.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::INT64 => FStatistics::int64( + min.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), + max.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::INT96 => { + // INT96 statistics may not be correct, because comparison is signed + let min = if let Some(data) = min { + assert_eq!(data.len(), 12); + Some(Int96::try_from_le_slice(data)?) + } else { + None + }; + let max = if let Some(data) = max { + assert_eq!(data.len(), 12); + Some(Int96::try_from_le_slice(data)?) + } else { + None + }; + FStatistics::int96(min, max, distinct_count, null_count, old_format) + } + Type::FLOAT => FStatistics::float( + min.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), + max.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::DOUBLE => FStatistics::double( + min.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), + max.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::BYTE_ARRAY => FStatistics::ByteArray( + ValueStatistics::new( + min.map(ByteArray::from), + max.map(ByteArray::from), + distinct_count, + null_count, + old_format, + ) + .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false)) + .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)), + ), + Type::FIXED_LEN_BYTE_ARRAY => FStatistics::FixedLenByteArray( + ValueStatistics::new( + min.map(ByteArray::from).map(FixedLenByteArray::from), + max.map(ByteArray::from).map(FixedLenByteArray::from), + distinct_count, + null_count, + old_format, + ) + .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false)) + .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)), + ), + }; + + Some(res) + } + None => None, + }) +} + +/// Create ParquetMetaData from thrift input. Note that this only decodes the file metadata in +/// the Parquet footer. Page indexes will need to be added later. +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ParquetMetaData { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let file_meta = super::thrift_gen::FileMetaData::try_from(prot)?; + + let version = file_meta.version; + let num_rows = file_meta.num_rows; + let row_groups = file_meta.row_groups; + let created_by = file_meta.created_by.map(|c| c.to_owned()); + let key_value_metadata = file_meta.key_value_metadata; + + let val = parquet_schema_from_array(file_meta.schema)?; + let schema_descr = Arc::new(SchemaDescriptor::new(val)); + + // need schema_descr to get final RowGroupMetaData + let row_groups = convert_row_groups(row_groups, schema_descr.clone())?; + + // need to map read column orders to actual values based on the schema + if file_meta + .column_orders + .as_ref() + .is_some_and(|cos| cos.len() != schema_descr.num_columns()) + { + return Err(general_err!("Column order length mismatch")); + } + + let column_orders = file_meta.column_orders.map(|cos| { + let mut res = Vec::with_capacity(cos.len()); + for (i, column) in schema_descr.columns().iter().enumerate() { + match cos[i] { + ColumnOrder::TYPE_DEFINED_ORDER(_) => { + let sort_order = ColumnOrder::get_sort_order( + column.logical_type(), + column.converted_type(), + column.physical_type(), + ); + res.push(ColumnOrder::TYPE_DEFINED_ORDER(sort_order)); + } + _ => res.push(cos[i]), + } + } + res + }); + + let fmd = crate::file::metadata::FileMetaData::new( + version, + num_rows, + created_by, + key_value_metadata, + schema_descr, + column_orders, + ); + + Ok(ParquetMetaData::new(fmd, row_groups)) + } +} diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs index 67ca2a3e4c71..281954d939dd 100644 --- a/parquet/src/file/page_encoding_stats.rs +++ b/parquet/src/file/page_encoding_stats.rs @@ -18,18 +18,19 @@ //! Per-page encoding information. use crate::basic::{Encoding, PageType}; -use crate::errors::Result; +use crate::errors::{ParquetError, Result}; +use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::thrift_struct; +// TODO: This should probably all be moved to thrift_gen +thrift_struct!( /// PageEncodingStats for a column chunk and data page. -#[derive(Clone, Debug, PartialEq, Eq)] pub struct PageEncodingStats { - /// the page type (data/dic/...) - pub page_type: PageType, - /// encoding of the page - pub encoding: Encoding, - /// number of pages of this type with this encoding - pub count: i32, + 1: required PageType page_type; + 2: required Encoding encoding; + 3: required i32 count; } +); /// Converts Thrift definition into `PageEncodingStats`. pub fn try_from_thrift( diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index 40e148a75e72..ebd86d861507 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -72,37 +72,6 @@ macro_rules! thrift_enum { } } -#[macro_export] -#[allow(clippy::crate_in_macro_def)] -/// macro to generate rust enums for empty thrift structs used in unions -macro_rules! thrift_empty_struct { - ($identifier: ident) => { - #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)] - pub struct $identifier {} - - impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - prot.skip_empty_struct()?; - Ok(Self {}) - } - } - - // TODO: remove when we finally get rid of the format module - impl From for $identifier { - fn from(_: $crate::format::$identifier) -> Self { - Self {} - } - } - - impl From<$identifier> for crate::format::$identifier { - fn from(_: $identifier) -> Self { - Self {} - } - } - }; -} - /// macro to generate rust enums for thrift unions where all fields are typed with empty structs #[macro_export] #[allow(clippy::crate_in_macro_def)] @@ -166,22 +135,73 @@ macro_rules! thrift_union_all_empty { } } -/// macro to generate rust structs from a thrift struct definition +/// macro to generate rust enums for thrift unions where all variants are a mix of unit and tuple types. +/// this requires modifying the thrift IDL. For variants with empty structs as their type, +/// delete the typename (i.e. "1: EmptyStruct Var1;" => "1: Var1"). For variants with a non-empty +/// type, put the typename in parens (e.g" "1: Type Var1;" => "1: (Type) Var1;"). #[macro_export] -macro_rules! thrift_private_struct { - ($(#[$($def_attrs:tt)*])* struct $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { +#[allow(clippy::crate_in_macro_def)] +macro_rules! thrift_union { + ($(#[$($def_attrs:tt)*])* union $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $( ( $field_type:ident $(< $element_type:ident >)? ) )? $field_name:ident $(;)?)* }) => { $(#[cfg_attr(not(doctest), $($def_attrs)*)])* - #[derive(Clone, Debug, PartialEq)] + #[derive(Clone, Debug, Eq, PartialEq)] #[allow(non_camel_case_types)] #[allow(non_snake_case)] - struct $identifier { - $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* pub $field_name: $crate::__thrift_required_or_optional!($required_or_optional $crate::__thrift_field_type!($field_type $($element_type)?))),* + #[allow(missing_docs)] + pub enum $identifier { + $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name $( ( $crate::__thrift_union_type!{$field_type $($element_type)?} ) )?),* } impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { type Error = ParquetError; + + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_struct_begin()?; + let field_ident = prot.read_field_begin()?; + if field_ident.field_type == FieldType::Stop { + return Err(general_err!("Received empty union from remote {}", stringify!($identifier))); + } + let ret = match field_ident.id { + $($field_id => { + let val = $crate::__thrift_read_variant!(prot, $field_name $($field_type $($element_type)?)?); + val + })* + _ => { + return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id)); + } + }; + let field_ident = prot.read_field_begin()?; + if field_ident.field_type != FieldType::Stop { + return Err(general_err!( + concat!("Received multiple fields for union from remote {}", stringify!($identifier)) + )); + } + prot.read_struct_end()?; + Ok(ret) + } + } + } +} + +/// macro to generate rust structs from a thrift struct definition +/// unlike enum and union, this macro will allow for visibility specifier +/// can also take optional lifetime for struct and elements within it (need e.g.) +#[macro_export] +macro_rules! thrift_struct { + ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { + $(#[cfg_attr(not(doctest), $($def_attrs)*)])* + #[derive(Clone, Debug, Eq, PartialEq)] + #[allow(non_camel_case_types)] + #[allow(non_snake_case)] + #[allow(missing_docs)] + $vis struct $identifier $(<$lt>)? { + $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $vis $field_name: $crate::__thrift_required_or_optional!($required_or_optional $crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?))),* + } + + impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier $(<$lt>)? { + type Error = ParquetError; fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - $(let mut $field_name: Option<$field_type> = None;)* + $(let mut $field_name: Option<$crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?)> = None;)* prot.read_struct_begin()?; loop { let field_ident = prot.read_field_begin()?; @@ -190,7 +210,7 @@ macro_rules! thrift_private_struct { } match field_ident.id { $($field_id => { - let val = $crate::__thrift_read_field!(prot $field_type); + let val = $crate::__thrift_read_field!(prot, $field_type $($field_lt)? $($element_type)?); $field_name = Some(val); })* _ => { @@ -198,14 +218,24 @@ macro_rules! thrift_private_struct { } }; } + prot.read_struct_end()?; + $($crate::__thrift_result_required_or_optional!($required_or_optional $field_name);)* Ok(Self { - $($field_name: $crate::__thrift_result_required_or_optional!($required_or_optional $field_name)),* + $($field_name),* }) } } } } +/// macro to use when decoding struct fields +#[macro_export] +macro_rules! thrift_read_field { + ($field_name:ident, $prot:tt, $field_type:ident) => { + $field_name = Some($crate::__thrift_read_field!($prot, $field_type)); + }; +} + #[doc(hidden)] #[macro_export] macro_rules! __thrift_required_or_optional { @@ -217,35 +247,50 @@ macro_rules! __thrift_required_or_optional { #[macro_export] macro_rules! __thrift_result_required_or_optional { (required $field_name:ident) => { - $field_name.expect(&format!( - "Required field {} not present", - stringify!($field_name) - )) - }; - (optional $field_name:ident) => { - $field_name + let $field_name = $field_name.expect(concat!( + "Required field ", + stringify!($field_name), + " is missing", + )); }; + (optional $field_name:ident) => {}; } #[doc(hidden)] #[macro_export] macro_rules! __thrift_read_field { - ($prot:tt bool) => { - $prot.read_bool()? + ($prot:tt, list $lt:lifetime binary) => { + Vec::<&'a [u8]>::try_from(&mut *$prot)? + }; + ($prot:tt, list $lt:lifetime $element_type:ident) => { + Vec::<$element_type>::try_from(&mut *$prot)? }; - ($prot:tt i8) => { - $prot.read_i8()? + ($prot:tt, list string) => { + Vec::::try_from(&mut *$prot)? + }; + ($prot:tt, list $element_type:ident) => { + Vec::<$element_type>::try_from(&mut *$prot)? + }; + ($prot:tt, string $lt:lifetime) => { + <&$lt str>::try_from(&mut *$prot)? + }; + ($prot:tt, binary $lt:lifetime) => { + <&$lt [u8]>::try_from(&mut *$prot)? + }; + ($prot:tt, $field_type:ident $lt:lifetime) => { + $field_type::try_from(&mut *$prot)? }; - ($prot:tt i32) => { - $prot.read_i32()? + ($prot:tt, string) => { + String::try_from(&mut *$prot)? }; - ($prot:tt i64) => { - $prot.read_i64()? + ($prot:tt, binary) => { + // this one needs to not conflict with `list` + $prot.read_bytes()?.to_vec() }; - ($prot:tt string) => { - $prot.read_string()? + ($prot:tt, double) => { + $crate::parquet_thrift::OrderedF64::try_from(&mut *$prot)? }; - ($prot:tt $field_type:ident) => { + ($prot:tt, $field_type:ident) => { $field_type::try_from(&mut *$prot)? }; } @@ -253,12 +298,36 @@ macro_rules! __thrift_read_field { #[doc(hidden)] #[macro_export] macro_rules! __thrift_field_type { + (binary $lt:lifetime) => { &$lt [u8] }; + (string $lt:lifetime) => { &$lt str }; + ($field_type:ident $lt:lifetime) => { $field_type<$lt> }; + (list $lt:lifetime $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type $lt) > }; + (list string) => { Vec }; (list $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type) > }; - (set $element_type:ident) => { Vec< $crate::__thrift_field_type!($element_type) > }; (binary) => { Vec }; (string) => { String }; + (double) => { $crate::parquet_thrift::OrderedF64 }; ($field_type:ty) => { $field_type }; - (Box $element_type:ident) => { std::boxed::Box< $crate::field_type!($element_type) > }; - (Rc $element_type:ident) => { std::rc::Rc< $crate::__thrift_field_type!($element_type) > }; - (Arc $element_type:ident) => { std::sync::Arc< $crate::__thrift_field_type!($element_type) > }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_union_type { + ($field_type:ident) => { $field_type }; + (list $field_type:ident) => { Vec<$field_type> }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_read_variant { + ($prot:tt, $field_name:ident $field_type:ident) => { + Self::$field_name($field_type::try_from(&mut *$prot)?) + }; + ($prot:tt, $field_name:ident list $field_type:ident) => { + Self::$field_name(Vec::<$field_type>::try_from(&mut *$prot)?) + }; + ($prot:tt, $field_name:ident) => {{ + $prot.skip_empty_struct()?; + Self::$field_name + }}; } diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index 2514f3fc70ff..7f5fe475217f 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -20,8 +20,37 @@ // to not allocate byte arrays or strings. #![allow(dead_code)] +use std::cmp::Ordering; + use crate::errors::{ParquetError, Result}; +// Couldn't implement thrift structs with f64 do to lack of Eq +// for f64. This is a hacky workaround for now...there are other +// wrappers out there that should probably be used instead. +// thrift seems to re-export an impl from ordered-float +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct OrderedF64(f64); + +impl From for f64 { + fn from(value: OrderedF64) -> Self { + value.0 + } +} + +impl Eq for OrderedF64 {} // Marker trait, requires PartialEq + +impl Ord for OrderedF64 { + fn cmp(&self, other: &Self) -> Ordering { + self.0.total_cmp(&other.0) + } +} + +impl PartialOrd for OrderedF64 { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + // Thrift compact protocol types for struct fields. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub(crate) enum FieldType { @@ -350,6 +379,11 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { } } + fn skip_binary(&mut self) -> Result<()> { + let len = self.read_vlq()? as usize; + self.skip_bytes(len) + } + /// Skip a field with type `field_type` recursively until the default /// maximum skip depth is reached. pub(crate) fn skip(&mut self, field_type: FieldType) -> Result<()> { @@ -381,10 +415,7 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { FieldType::I32 => self.skip_vlq().map(|_| ()), FieldType::I64 => self.skip_vlq().map(|_| ()), FieldType::Double => self.skip_bytes(8).map(|_| ()), - FieldType::Binary => { - let len = self.read_vlq()? as usize; - self.skip_bytes(len) - } + FieldType::Binary => self.skip_binary().map(|_| ()), FieldType::Struct => { self.read_struct_begin()?; loop { @@ -413,3 +444,85 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { fn eof_error() -> ParquetError { eof_err!("Unexpected EOF") } + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for bool { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_bool() + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i8 { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_i8() + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i16 { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_i16() + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i32 { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_i32() + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i64 { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_i64() + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for OrderedF64 { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + Ok(OrderedF64(prot.read_double()?)) + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for &'a str { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_string() + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for String { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + Ok(prot.read_string()?.to_owned()) + } +} + +impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for &'a [u8] { + type Error = ParquetError; + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + prot.read_bytes() + } +} + +impl<'a, T> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Vec +where + T: for<'b> TryFrom<&'b mut ThriftCompactInputProtocol<'a>>, + ParquetError: for<'b> From<>>::Error>, +{ + type Error = ParquetError; + + fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + let list_ident = prot.read_list_begin()?; + let mut res = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let val = T::try_from(prot)?; + res.push(val); + } + + Ok(res) + } +} diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 8cda0e0d1841..1406295c3a4f 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -17,8 +17,10 @@ //! Contains structs and methods to build Parquet schema and schema descriptors. +use std::vec::IntoIter; use std::{collections::HashMap, fmt, sync::Arc}; +use crate::file::metadata::thrift_gen::SchemaElement; use crate::file::metadata::HeapSize; use crate::basic::{ @@ -1026,11 +1028,14 @@ impl HeapSize for SchemaDescriptor { impl SchemaDescriptor { /// Creates new schema descriptor from Parquet schema. pub fn new(tp: TypePtr) -> Self { + const INIT_SCHEMA_DEPTH: usize = 16; assert!(tp.is_group(), "SchemaDescriptor should take a GroupType"); - let mut leaves = vec![]; - let mut leaf_to_base = Vec::new(); + let n_leaves = num_leaves(&tp); + let mut leaves = Vec::with_capacity(n_leaves); + let mut leaf_to_base = Vec::with_capacity(n_leaves); + let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH); for (root_idx, f) in tp.get_fields().iter().enumerate() { - let mut path = vec![]; + path.clear(); build_tree(f, root_idx, 0, 0, &mut leaves, &mut leaf_to_base, &mut path); } @@ -1109,6 +1114,26 @@ impl SchemaDescriptor { } } +// do a quick walk of the tree to get proper sizing for SchemaDescriptor arrays +fn num_leaves(tp: &TypePtr) -> usize { + let mut n_leaves = 0usize; + for f in tp.get_fields().iter() { + count_leaves(f, &mut n_leaves); + } + n_leaves +} + +fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) { + match tp.as_ref() { + Type::PrimitiveType { .. } => *n_leaves += 1, + Type::GroupType { ref fields, .. } => { + for f in fields { + count_leaves(f, n_leaves); + } + } + } +} + fn build_tree<'a>( tp: &'a TypePtr, root_idx: usize, @@ -1287,7 +1312,7 @@ fn from_thrift_helper( .map(Repetition::try_from) .transpose()?; - let mut fields = vec![]; + let mut fields = Vec::with_capacity(n as usize); let mut next_index = index + 1; for _ in 0..n { let child_result = from_thrift_helper(elements, next_index)?; @@ -1402,6 +1427,151 @@ fn to_thrift_helper(schema: &Type, elements: &mut Vec`. + +// convert thrift decoded array of `SchemaElement` into this crate's representation of +// parquet types. this function consumes `elements`. +pub(crate) fn parquet_schema_from_array<'a>(elements: Vec>) -> Result { + let mut index = 0; + let num_elements = elements.len(); + let mut schema_nodes = Vec::with_capacity(1); // there should only be one element when done + + // turn into iterator so we can take ownership of elements of the vector + let mut elements = elements.into_iter(); + + while index < num_elements { + let t = schema_from_array_helper(&mut elements, num_elements, index)?; + index = t.0; + schema_nodes.push(t.1); + } + if schema_nodes.len() != 1 { + return Err(general_err!( + "Expected exactly one root node, but found {}", + schema_nodes.len() + )); + } + + if !schema_nodes[0].is_group() { + return Err(general_err!("Expected root node to be a group type")); + } + + Ok(schema_nodes.remove(0)) +} + +// recursive helper function for schema conversion +fn schema_from_array_helper<'a>( + elements: &mut IntoIter>, + num_elements: usize, + index: usize, +) -> Result<(usize, TypePtr)> { + // Whether or not the current node is root (message type). + // There is only one message type node in the schema tree. + let is_root_node = index == 0; + + if index >= num_elements { + return Err(general_err!( + "Index out of bound, index = {}, len = {}", + index, + num_elements + )); + } + let element = elements.next().expect("schema vector should not be empty"); + + // Check for empty schema + if let (true, None | Some(0)) = (is_root_node, element.num_children) { + let builder = Type::group_type_builder(element.name); + return Ok((index + 1, Arc::new(builder.build().unwrap()))); + } + + let converted_type = element.converted_type.unwrap_or(ConvertedType::NONE); + + // LogicalType is prefered to ConvertedType, but both may be present. + let logical_type = element.logical_type; + + check_logical_type(&logical_type)?; + + let field_id = element.field_id; + match element.num_children { + // From parquet-format: + // The children count is used to construct the nested relationship. + // This field is not set when the element is a primitive type + // Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we + // have to handle this case too. + None | Some(0) => { + // primitive type + if element.repetition_type.is_none() { + return Err(general_err!( + "Repetition level must be defined for a primitive type" + )); + } + let repetition = element.repetition_type.unwrap(); + if let Some(type_) = element.type_ { + let physical_type = type_; + let length = element.type_length.unwrap_or(-1); + let scale = element.scale.unwrap_or(-1); + let precision = element.precision.unwrap_or(-1); + let name = element.name; + let builder = Type::primitive_type_builder(name, physical_type) + .with_repetition(repetition) + .with_converted_type(converted_type) + .with_logical_type(logical_type) + .with_length(length) + .with_precision(precision) + .with_scale(scale) + .with_id(field_id); + Ok((index + 1, Arc::new(builder.build()?))) + } else { + let mut builder = Type::group_type_builder(element.name) + .with_converted_type(converted_type) + .with_logical_type(logical_type) + .with_id(field_id); + if !is_root_node { + // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or + // REPEATED for root node. + // + // We only set repetition for group types that are not top-level message + // type. According to parquet-format: + // Root of the schema does not have a repetition_type. + // All other types must have one. + builder = builder.with_repetition(repetition); + } + Ok((index + 1, Arc::new(builder.build().unwrap()))) + } + } + Some(n) => { + let repetition = element.repetition_type; + + let mut fields = Vec::with_capacity(n as usize); + let mut next_index = index + 1; + for _ in 0..n { + let child_result = schema_from_array_helper(elements, num_elements, next_index)?; + next_index = child_result.0; + fields.push(child_result.1); + } + + let mut builder = Type::group_type_builder(element.name) + .with_converted_type(converted_type) + .with_logical_type(logical_type) + .with_fields(fields) + .with_id(field_id); + if let Some(rep) = repetition { + // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or + // REPEATED for root node. + // + // We only set repetition for group types that are not top-level message + // type. According to parquet-format: + // Root of the schema does not have a repetition_type. + // All other types must have one. + if !is_root_node { + builder = builder.with_repetition(rep); + } + } + Ok((next_index, Arc::new(builder.build().unwrap()))) + } + } +} + #[cfg(test)] mod tests { use super::*; From 3c353e28bf0c4cd62a6aa814f034ca817f1a8cdb Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 20 Aug 2025 12:19:28 -0700 Subject: [PATCH 04/15] [thrift-remodel] Decoding of page indexes (#8160) # Which issue does this PR close? **Note: this targets a feature branch, not main** We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Part of #5854. # Rationale for this change Speed # What changes are included in this PR? Still a work in progress, but begins the process of converting page index parsing to the new thrift decoder. # Are these changes tested? This PR actually uses the new decoder when parsing the page indexes using the existing machinery. As such all tests involving the page indexes should apply to this code. # Are there any user-facing changes? Yes --- parquet/src/basic.rs | 53 +++++++++++++++ parquet/src/file/page_index/index.rs | 71 +++++++++++++++++++++ parquet/src/file/page_index/index_reader.rs | 47 +++++++++----- parquet/src/file/page_index/offset_index.rs | 44 +++++++------ parquet/src/parquet_macros.rs | 5 +- 5 files changed, 182 insertions(+), 38 deletions(-) diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 78d294acd577..c325cf5dbf2b 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -501,15 +501,68 @@ thrift_enum!( /// /// [WriterVersion]: crate::file::properties::WriterVersion enum Encoding { + /// Default encoding. + /// - BOOLEAN - 1 bit per value. 0 is false; 1 is true. + /// - INT32 - 4 bytes per value. Stored as little-endian. + /// - INT64 - 8 bytes per value. Stored as little-endian. + /// - FLOAT - 4 bytes per value. IEEE. Stored as little-endian. + /// - DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. + /// - BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. + /// - FIXED_LEN_BYTE_ARRAY - Just the bytes. PLAIN = 0; // GROUP_VAR_INT = 1; + /// **Deprecated** dictionary encoding. + /// + /// The values in the dictionary are encoded using PLAIN encoding. + /// Since it is deprecated, RLE_DICTIONARY encoding is used for a data page, and + /// PLAIN encoding is used for dictionary page. PLAIN_DICTIONARY = 2; + /// Group packed run length encoding. + /// + /// Usable for definition/repetition levels encoding and boolean values. RLE = 3; + /// **Deprecated** Bit-packed encoding. + /// + /// This can only be used if the data has a known max width. + /// Usable for definition/repetition levels encoding. + /// + /// There are compatibility issues with files using this encoding. + /// The parquet standard specifies the bits to be packed starting from the + /// most-significant bit, several implementations do not follow this bit order. + /// Several other implementations also have issues reading this encoding + /// because of incorrect assumptions about the length of the encoded data. + /// + /// The RLE/bit-packing hybrid is more cpu and memory efficient and should be used instead. + #[deprecated( + since = "51.0.0", + note = "Please see documentation for compatibility issues and use the RLE/bit-packing hybrid encoding instead" + )] BIT_PACKED = 4; + /// Delta encoding for integers, either INT32 or INT64. + /// + /// Works best on sorted data. DELTA_BINARY_PACKED = 5; + /// Encoding for byte arrays to separate the length values and the data. + /// + /// The lengths are encoded using DELTA_BINARY_PACKED encoding. DELTA_LENGTH_BYTE_ARRAY = 6; + /// Incremental encoding for byte arrays. + /// + /// Prefix lengths are encoded using DELTA_BINARY_PACKED encoding. + /// Suffixes are stored using DELTA_LENGTH_BYTE_ARRAY encoding. DELTA_BYTE_ARRAY = 7; + /// Dictionary encoding. + /// + /// The ids are encoded using the RLE encoding. RLE_DICTIONARY = 8; + /// Encoding for fixed-width data. + /// + /// K byte-streams are created where K is the size in bytes of the data type. + /// The individual bytes of a value are scattered to the corresponding stream and + /// the streams are concatenated. + /// This itself does not reduce the size of the data but can lead to better compression + /// afterwards. Note that the use of this encoding with FIXED_LEN_BYTE_ARRAY(N) data may + /// perform poorly for large values of N. BYTE_STREAM_SPLIT = 9; } ); diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs index 2c9aa009080e..ed586bcd33d0 100644 --- a/parquet/src/file/page_index/index.rs +++ b/parquet/src/file/page_index/index.rs @@ -24,6 +24,7 @@ use crate::data_type::private::ParquetValueType; use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96}; use crate::errors::ParquetError; use crate::file::metadata::LevelHistogram; +use crate::file::page_index::index_reader::ColumnIndex; use std::fmt::Debug; /// Typed statistics for one data page @@ -193,6 +194,7 @@ impl NativeIndex { pub const PHYSICAL_TYPE: Type = T::PHYSICAL_TYPE; /// Creates a new [`NativeIndex`] + #[allow(dead_code)] pub(crate) fn try_new(index: crate::format::ColumnIndex) -> Result { let len = index.min_values.len(); @@ -306,6 +308,75 @@ impl NativeIndex { definition_level_histograms, ) } + + /// Creates a new [`NativeIndex`] + pub(crate) fn try_new_local(index: ColumnIndex) -> Result { + let len = index.min_values.len(); + + // turn Option> into Vec> + let null_counts = index + .null_counts + .map(|x| x.into_iter().map(Some).collect::>()) + .unwrap_or_else(|| vec![None; len]); + + // histograms are a 1D array encoding a 2D num_pages X num_levels matrix. + let to_page_histograms = |opt_hist: Option>| { + if let Some(hist) = opt_hist { + // TODO: should we assert (hist.len() % len) == 0? + let num_levels = hist.len() / len; + let mut res = Vec::with_capacity(len); + for i in 0..len { + let page_idx = i * num_levels; + let page_hist = hist[page_idx..page_idx + num_levels].to_vec(); + res.push(Some(LevelHistogram::from(page_hist))); + } + res + } else { + vec![None; len] + } + }; + + // turn Option> into Vec> + let rep_hists: Vec> = + to_page_histograms(index.repetition_level_histograms); + let def_hists: Vec> = + to_page_histograms(index.definition_level_histograms); + + // start assembling Vec + let mut indexes: Vec> = Vec::with_capacity(len); + let mut rep_iter = rep_hists.into_iter(); + let mut def_iter = def_hists.into_iter(); + + // this used to zip together the other iters, but that was quite a bit + // slower than this approach. + for (i, null_count) in null_counts.into_iter().enumerate().take(len) { + let is_null = index.null_pages[i]; + let min = if is_null { + None + } else { + Some(T::try_from_le_slice(index.min_values[i])?) + }; + let max = if is_null { + None + } else { + Some(T::try_from_le_slice(index.max_values[i])?) + }; + + indexes.push(PageIndex { + min, + max, + null_count, + repetition_level_histogram: rep_iter.next().unwrap_or(None), + definition_level_histogram: def_iter.next().unwrap_or(None), + }) + } + + let boundary_order = index.boundary_order; + Ok(Self { + indexes, + boundary_order, + }) + } } #[cfg(test)] diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index d4d405d68ff2..fbe6d3984596 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -17,14 +17,15 @@ //! Support for reading [`Index`] and [`OffsetIndexMetaData`] from parquet metadata. -use crate::basic::Type; +use crate::basic::{BoundaryOrder, Type}; use crate::data_type::Int96; -use crate::errors::ParquetError; +use crate::errors::{ParquetError, Result}; use crate::file::metadata::ColumnChunkMetaData; use crate::file::page_index::index::{Index, NativeIndex}; use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::file::reader::ChunkReader; -use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; +use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::thrift_struct; use std::ops::Range; /// Computes the covering range of two optional ranges @@ -129,25 +130,37 @@ pub fn read_offset_indexes( } pub(crate) fn decode_offset_index(data: &[u8]) -> Result { - let mut prot = TCompactSliceInputProtocol::new(data); - let offset = crate::format::OffsetIndex::read_from_in_protocol(&mut prot)?; - OffsetIndexMetaData::try_new(offset) + let mut prot = ThriftCompactInputProtocol::new(data); + OffsetIndexMetaData::try_from(&mut prot) } -pub(crate) fn decode_column_index(data: &[u8], column_type: Type) -> Result { - let mut prot = TCompactSliceInputProtocol::new(data); +thrift_struct!( +pub(crate) struct ColumnIndex<'a> { + 1: required list null_pages + 2: required list<'a> min_values + 3: required list<'a> max_values + 4: required BoundaryOrder boundary_order + 5: optional list null_counts + 6: optional list repetition_level_histograms; + 7: optional list definition_level_histograms; +} +); - let index = crate::format::ColumnIndex::read_from_in_protocol(&mut prot)?; +pub(crate) fn decode_column_index(data: &[u8], column_type: Type) -> Result { + let mut prot = ThriftCompactInputProtocol::new(data); + let index = ColumnIndex::try_from(&mut prot)?; let index = match column_type { - Type::BOOLEAN => Index::BOOLEAN(NativeIndex::::try_new(index)?), - Type::INT32 => Index::INT32(NativeIndex::::try_new(index)?), - Type::INT64 => Index::INT64(NativeIndex::::try_new(index)?), - Type::INT96 => Index::INT96(NativeIndex::::try_new(index)?), - Type::FLOAT => Index::FLOAT(NativeIndex::::try_new(index)?), - Type::DOUBLE => Index::DOUBLE(NativeIndex::::try_new(index)?), - Type::BYTE_ARRAY => Index::BYTE_ARRAY(NativeIndex::try_new(index)?), - Type::FIXED_LEN_BYTE_ARRAY => Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new(index)?), + Type::BOOLEAN => Index::BOOLEAN(NativeIndex::::try_new_local(index)?), + Type::INT32 => Index::INT32(NativeIndex::::try_new_local(index)?), + Type::INT64 => Index::INT64(NativeIndex::::try_new_local(index)?), + Type::INT96 => Index::INT96(NativeIndex::::try_new_local(index)?), + Type::FLOAT => Index::FLOAT(NativeIndex::::try_new_local(index)?), + Type::DOUBLE => Index::DOUBLE(NativeIndex::::try_new_local(index)?), + Type::BYTE_ARRAY => Index::BYTE_ARRAY(NativeIndex::try_new_local(index)?), + Type::FIXED_LEN_BYTE_ARRAY => { + Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new_local(index)?) + } }; Ok(index) diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index 5614b1750a0e..d4c196a3ae8b 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -19,21 +19,25 @@ //! //! [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -use crate::errors::ParquetError; +use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::{ + errors::{ParquetError, Result}, + thrift_struct, +}; +thrift_struct!( /// Page location information for [`OffsetIndexMetaData`] -#[derive(Clone, Debug, Eq, PartialEq)] pub struct PageLocation { - /// Offset of the page in the file * - pub offset: i64, - /// Size of the page, including header. Sum of compressed_page_size and header - /// length - pub compressed_page_size: i32, - /// Index within the RowGroup of the first row of the page. When an - /// OffsetIndex is present, pages must begin on row boundaries - /// (repetition_level = 0). - pub first_row_index: i64, + /// Offset of the page in the file + 1: required i64 offset + /// Size of the page, including header. Sum of compressed_page_size and header + 2: required i32 compressed_page_size + /// Index within the RowGroup of the first row of the page. When an + /// OffsetIndex is present, pages must begin on row boundaries + /// (repetition_level = 0). + 3: required i64 first_row_index } +); impl From<&crate::format::PageLocation> for PageLocation { fn from(value: &crate::format::PageLocation) -> Self { @@ -55,24 +59,26 @@ impl From<&PageLocation> for crate::format::PageLocation { } } +thrift_struct!( /// [`OffsetIndex`] information for a column chunk. Contains offsets and sizes for each page /// in the chunk. Optionally stores fully decoded page sizes for BYTE_ARRAY columns. /// /// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -#[derive(Debug, Clone, PartialEq)] pub struct OffsetIndexMetaData { - /// Vector of [`PageLocation`] objects, one per page in the chunk. - pub page_locations: Vec, - /// Optional vector of unencoded page sizes, one per page in the chunk. - /// Only defined for BYTE_ARRAY columns. - pub unencoded_byte_array_data_bytes: Option>, + /// Vector of [`PageLocation`] objects, one per page in the chunk. + 1: required list page_locations + /// Optional vector of unencoded page sizes, one per page in the chunk. + /// Only defined for BYTE_ARRAY columns. + 2: optional list unencoded_byte_array_data_bytes } +); impl OffsetIndexMetaData { /// Creates a new [`OffsetIndexMetaData`] from an [`OffsetIndex`]. /// /// [`OffsetIndex`]: crate::format::OffsetIndex - pub(crate) fn try_new(index: crate::format::OffsetIndex) -> Result { + #[allow(dead_code)] + pub(crate) fn try_new(index: crate::format::OffsetIndex) -> Result { let page_locations = index.page_locations.iter().map(|loc| loc.into()).collect(); Ok(Self { page_locations, @@ -91,8 +97,6 @@ impl OffsetIndexMetaData { self.unencoded_byte_array_data_bytes.as_ref() } - // TODO: remove annotation after merge - #[allow(dead_code)] pub(crate) fn to_thrift(&self) -> crate::format::OffsetIndex { let page_locations = self.page_locations.iter().map(|loc| loc.into()).collect(); crate::format::OffsetIndex::new( diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index ebd86d861507..2d1ccd819b37 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -30,11 +30,12 @@ macro_rules! thrift_enum { #[allow(non_camel_case_types)] #[allow(missing_docs)] pub enum $identifier { - $($field_name = $field_value,)* + $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name = $field_value,)* } impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { type Error = ParquetError; + #[allow(deprecated)] fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { let val = prot.read_i32()?; match val { @@ -54,6 +55,7 @@ macro_rules! thrift_enum { impl TryFrom for $identifier { type Error = ParquetError; + #[allow(deprecated)] fn try_from(value: crate::format::$identifier) -> Result { Ok(match value { $(crate::format::$identifier::$field_name => Self::$field_name,)* @@ -63,6 +65,7 @@ macro_rules! thrift_enum { } impl From<$identifier> for crate::format::$identifier { + #[allow(deprecated)] fn from(value: $identifier) -> Self { match value { $($identifier::$field_name => Self::$field_name,)* From f777584af4dab38c00e94ea9a045c3557f8069a4 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 27 Aug 2025 12:43:29 -0700 Subject: [PATCH 05/15] [thrift-remodel] PoC new form for column index (#8191) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change Parsing the column index is _very_ slow. The largest part of the cost is taking the thrift structure (which is a struct of arrays) and converting it to an array of structs. This results in a large number of allocations when dealing with binary columns. This is an experiment in creating a new structure to hold the column index info that is a little friendlier to parse. It may also be easier to consume on the datafusion side. # What changes are included in this PR? A new `ColumnIndexMetaData` enum is added along with a type parameterized `NativeColumnIndex` struct. # Are these changes tested? No, this is an experiment only. If this work can be honed into an acceptible `Index` replacement, then tests will be added at that time. # Are there any user-facing changes? Yes, this would be a radical change to the column indexes in `ParquetMetaData`. --- parquet/src/arrow/arrow_reader/statistics.rs | 214 ++----- parquet/src/arrow/arrow_writer/mod.rs | 13 +- parquet/src/bin/parquet-index.rs | 89 ++- parquet/src/file/metadata/memory.rs | 50 ++ parquet/src/file/metadata/mod.rs | 24 +- parquet/src/file/metadata/reader.rs | 8 +- parquet/src/file/metadata/writer.rs | 35 +- parquet/src/file/page_index/column_index.rs | 569 +++++++++++++++++++ parquet/src/file/page_index/index.rs | 5 +- parquet/src/file/page_index/index_reader.rs | 40 +- parquet/src/file/page_index/mod.rs | 1 + parquet/src/file/serialized_reader.rs | 95 ++-- parquet/src/file/writer.rs | 28 +- parquet/tests/arrow_reader/io/mod.rs | 10 +- parquet/tests/encryption/encryption_util.rs | 10 +- 15 files changed, 908 insertions(+), 283 deletions(-) create mode 100644 parquet/src/file/page_index/column_index.rs diff --git a/parquet/src/arrow/arrow_reader/statistics.rs b/parquet/src/arrow/arrow_reader/statistics.rs index eba1f561203c..1613656ab9ae 100644 --- a/parquet/src/arrow/arrow_reader/statistics.rs +++ b/parquet/src/arrow/arrow_reader/statistics.rs @@ -25,7 +25,7 @@ use crate::basic::Type as PhysicalType; use crate::data_type::{ByteArray, FixedLenByteArray}; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ParquetColumnIndex, ParquetOffsetIndex, RowGroupMetaData}; -use crate::file::page_index::index::{Index, PageIndex}; +use crate::file::page_index::column_index::{ColumnIndexIterators, ColumnIndexMetaData}; use crate::file::statistics::Statistics as ParquetStatistics; use crate::schema::types::SchemaDescriptor; use arrow_array::builder::{ @@ -597,17 +597,17 @@ macro_rules! get_statistics { } macro_rules! make_data_page_stats_iterator { - ($iterator_type: ident, $func: expr, $index_type: path, $stat_value_type: ty) => { + ($iterator_type: ident, $func: ident, $stat_value_type: ty) => { struct $iterator_type<'a, I> where - I: Iterator, + I: Iterator, { iter: I, } impl<'a, I> $iterator_type<'a, I> where - I: Iterator, + I: Iterator, { fn new(iter: I) -> Self { Self { iter } @@ -616,7 +616,7 @@ macro_rules! make_data_page_stats_iterator { impl<'a, I> Iterator for $iterator_type<'a, I> where - I: Iterator, + I: Iterator, { type Item = Vec>; @@ -624,16 +624,14 @@ macro_rules! make_data_page_stats_iterator { let next = self.iter.next(); match next { Some((len, index)) => match index { - $index_type(native_index) => { - Some(native_index.indexes.iter().map($func).collect::>()) - } // No matching `Index` found; // thus no statistics that can be extracted. // We return vec![None; len] to effectively // create an arrow null-array with the length // corresponding to the number of entries in // `ParquetOffsetIndex` per row group per column. - _ => Some(vec![None; len]), + ColumnIndexMetaData::NONE => Some(vec![None; len]), + _ => Some(<$stat_value_type>::$func(&index).collect::>()), }, _ => None, } @@ -646,101 +644,45 @@ macro_rules! make_data_page_stats_iterator { }; } -make_data_page_stats_iterator!( - MinBooleanDataPageStatsIterator, - |x: &PageIndex| { x.min }, - Index::BOOLEAN, - bool -); -make_data_page_stats_iterator!( - MaxBooleanDataPageStatsIterator, - |x: &PageIndex| { x.max }, - Index::BOOLEAN, - bool -); -make_data_page_stats_iterator!( - MinInt32DataPageStatsIterator, - |x: &PageIndex| { x.min }, - Index::INT32, - i32 -); -make_data_page_stats_iterator!( - MaxInt32DataPageStatsIterator, - |x: &PageIndex| { x.max }, - Index::INT32, - i32 -); -make_data_page_stats_iterator!( - MinInt64DataPageStatsIterator, - |x: &PageIndex| { x.min }, - Index::INT64, - i64 -); -make_data_page_stats_iterator!( - MaxInt64DataPageStatsIterator, - |x: &PageIndex| { x.max }, - Index::INT64, - i64 -); +make_data_page_stats_iterator!(MinBooleanDataPageStatsIterator, min_values_iter, bool); +make_data_page_stats_iterator!(MaxBooleanDataPageStatsIterator, max_values_iter, bool); +make_data_page_stats_iterator!(MinInt32DataPageStatsIterator, min_values_iter, i32); +make_data_page_stats_iterator!(MaxInt32DataPageStatsIterator, max_values_iter, i32); +make_data_page_stats_iterator!(MinInt64DataPageStatsIterator, min_values_iter, i64); +make_data_page_stats_iterator!(MaxInt64DataPageStatsIterator, max_values_iter, i64); make_data_page_stats_iterator!( MinFloat16DataPageStatsIterator, - |x: &PageIndex| { x.min.clone() }, - Index::FIXED_LEN_BYTE_ARRAY, + min_values_iter, FixedLenByteArray ); make_data_page_stats_iterator!( MaxFloat16DataPageStatsIterator, - |x: &PageIndex| { x.max.clone() }, - Index::FIXED_LEN_BYTE_ARRAY, + max_values_iter, FixedLenByteArray ); -make_data_page_stats_iterator!( - MinFloat32DataPageStatsIterator, - |x: &PageIndex| { x.min }, - Index::FLOAT, - f32 -); -make_data_page_stats_iterator!( - MaxFloat32DataPageStatsIterator, - |x: &PageIndex| { x.max }, - Index::FLOAT, - f32 -); -make_data_page_stats_iterator!( - MinFloat64DataPageStatsIterator, - |x: &PageIndex| { x.min }, - Index::DOUBLE, - f64 -); -make_data_page_stats_iterator!( - MaxFloat64DataPageStatsIterator, - |x: &PageIndex| { x.max }, - Index::DOUBLE, - f64 -); +make_data_page_stats_iterator!(MinFloat32DataPageStatsIterator, min_values_iter, f32); +make_data_page_stats_iterator!(MaxFloat32DataPageStatsIterator, max_values_iter, f32); +make_data_page_stats_iterator!(MinFloat64DataPageStatsIterator, min_values_iter, f64); +make_data_page_stats_iterator!(MaxFloat64DataPageStatsIterator, max_values_iter, f64); make_data_page_stats_iterator!( MinByteArrayDataPageStatsIterator, - |x: &PageIndex| { x.min.clone() }, - Index::BYTE_ARRAY, + min_values_iter, ByteArray ); make_data_page_stats_iterator!( MaxByteArrayDataPageStatsIterator, - |x: &PageIndex| { x.max.clone() }, - Index::BYTE_ARRAY, + max_values_iter, ByteArray ); make_data_page_stats_iterator!( MaxFixedLenByteArrayDataPageStatsIterator, - |x: &PageIndex| { x.max.clone() }, - Index::FIXED_LEN_BYTE_ARRAY, + max_values_iter, FixedLenByteArray ); make_data_page_stats_iterator!( MinFixedLenByteArrayDataPageStatsIterator, - |x: &PageIndex| { x.min.clone() }, - Index::FIXED_LEN_BYTE_ARRAY, + min_values_iter, FixedLenByteArray ); @@ -748,14 +690,14 @@ macro_rules! get_decimal_page_stats_iterator { ($iterator_type: ident, $func: ident, $stat_value_type: ident, $convert_func: ident) => { struct $iterator_type<'a, I> where - I: Iterator, + I: Iterator, { iter: I, } impl<'a, I> $iterator_type<'a, I> where - I: Iterator, + I: Iterator, { fn new(iter: I) -> Self { Self { iter } @@ -764,44 +706,37 @@ macro_rules! get_decimal_page_stats_iterator { impl<'a, I> Iterator for $iterator_type<'a, I> where - I: Iterator, + I: Iterator, { type Item = Vec>; + // Some(native_index.$func().map(|v| v.map($conv)).collect::>()) fn next(&mut self) -> Option { let next = self.iter.next(); match next { Some((len, index)) => match index { - Index::INT32(native_index) => Some( + ColumnIndexMetaData::INT32(native_index) => Some( native_index - .indexes - .iter() - .map(|x| x.$func.and_then(|x| Some($stat_value_type::from(x)))) + .$func() + .map(|x| x.map(|x| $stat_value_type::from(*x))) .collect::>(), ), - Index::INT64(native_index) => Some( + ColumnIndexMetaData::INT64(native_index) => Some( native_index - .indexes - .iter() - .map(|x| x.$func.and_then(|x| $stat_value_type::try_from(x).ok())) + .$func() + .map(|x| x.map(|x| $stat_value_type::try_from(*x).unwrap())) .collect::>(), ), - Index::BYTE_ARRAY(native_index) => Some( + ColumnIndexMetaData::BYTE_ARRAY(native_index) => Some( native_index - .indexes - .iter() - .map(|x| { - x.clone().$func.and_then(|x| Some($convert_func(x.data()))) - }) + .$func() + .map(|x| x.map(|x| $convert_func(x))) .collect::>(), ), - Index::FIXED_LEN_BYTE_ARRAY(native_index) => Some( + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(native_index) => Some( native_index - .indexes - .iter() - .map(|x| { - x.clone().$func.and_then(|x| Some($convert_func(x.data()))) - }) + .$func() + .map(|x| x.map(|x| $convert_func(x))) .collect::>(), ), _ => Some(vec![None; len]), @@ -819,56 +754,56 @@ macro_rules! get_decimal_page_stats_iterator { get_decimal_page_stats_iterator!( MinDecimal32DataPageStatsIterator, - min, + min_values_iter, i32, from_bytes_to_i32 ); get_decimal_page_stats_iterator!( MaxDecimal32DataPageStatsIterator, - max, + max_values_iter, i32, from_bytes_to_i32 ); get_decimal_page_stats_iterator!( MinDecimal64DataPageStatsIterator, - min, + min_values_iter, i64, from_bytes_to_i64 ); get_decimal_page_stats_iterator!( MaxDecimal64DataPageStatsIterator, - max, + max_values_iter, i64, from_bytes_to_i64 ); get_decimal_page_stats_iterator!( MinDecimal128DataPageStatsIterator, - min, + min_values_iter, i128, from_bytes_to_i128 ); get_decimal_page_stats_iterator!( MaxDecimal128DataPageStatsIterator, - max, + max_values_iter, i128, from_bytes_to_i128 ); get_decimal_page_stats_iterator!( MinDecimal256DataPageStatsIterator, - min, + min_values_iter, i256, from_bytes_to_i256 ); get_decimal_page_stats_iterator!( MaxDecimal256DataPageStatsIterator, - max, + max_values_iter, i256, from_bytes_to_i256 ); @@ -1174,77 +1109,44 @@ fn max_statistics<'a, I: Iterator>>( } /// Extracts the min statistics from an iterator -/// of parquet page [`Index`]'es to an [`ArrayRef`] +/// of parquet page [`ColumnIndexMetaData`]'s to an [`ArrayRef`] pub(crate) fn min_page_statistics<'a, I>( data_type: &DataType, iterator: I, physical_type: Option, ) -> Result where - I: Iterator, + I: Iterator, { get_data_page_statistics!(Min, data_type, iterator, physical_type) } /// Extracts the max statistics from an iterator -/// of parquet page [`Index`]'es to an [`ArrayRef`] +/// of parquet page [`ColumnIndexMetaData`]'s to an [`ArrayRef`] pub(crate) fn max_page_statistics<'a, I>( data_type: &DataType, iterator: I, physical_type: Option, ) -> Result where - I: Iterator, + I: Iterator, { get_data_page_statistics!(Max, data_type, iterator, physical_type) } /// Extracts the null count statistics from an iterator -/// of parquet page [`Index`]'es to an [`ArrayRef`] +/// of parquet page [`ColumnIndexMetaData`]'s to an [`ArrayRef`] /// /// The returned Array is an [`UInt64Array`] pub(crate) fn null_counts_page_statistics<'a, I>(iterator: I) -> Result where - I: Iterator, + I: Iterator, { let iter = iterator.flat_map(|(len, index)| match index { - Index::NONE => vec![None; len], - Index::BOOLEAN(native_index) => native_index - .indexes - .iter() - .map(|x| x.null_count.map(|x| x as u64)) - .collect::>(), - Index::INT32(native_index) => native_index - .indexes - .iter() - .map(|x| x.null_count.map(|x| x as u64)) - .collect::>(), - Index::INT64(native_index) => native_index - .indexes - .iter() - .map(|x| x.null_count.map(|x| x as u64)) - .collect::>(), - Index::FLOAT(native_index) => native_index - .indexes - .iter() - .map(|x| x.null_count.map(|x| x as u64)) - .collect::>(), - Index::DOUBLE(native_index) => native_index - .indexes - .iter() - .map(|x| x.null_count.map(|x| x as u64)) - .collect::>(), - Index::FIXED_LEN_BYTE_ARRAY(native_index) => native_index - .indexes - .iter() - .map(|x| x.null_count.map(|x| x as u64)) - .collect::>(), - Index::BYTE_ARRAY(native_index) => native_index - .indexes - .iter() - .map(|x| x.null_count.map(|x| x as u64)) - .collect::>(), - _ => unimplemented!(), + ColumnIndexMetaData::NONE => vec![None; len], + column_index => column_index.null_counts().map_or(vec![None; len], |v| { + v.iter().map(|i| Some(*i as u64)).collect::>() + }), }); Ok(UInt64Array::from_iter(iter)) @@ -1573,7 +1475,7 @@ impl<'a> StatisticsConverter<'a> { /// page level statistics can prune at a finer granularity. /// /// However since they are stored in a separate metadata - /// structure ([`Index`]) there is different code to extract them as + /// structure ([`ColumnIndexMetaData`]) there is different code to extract them as /// compared to arrow statistics. /// /// # Parameters: diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index c6b0b426f9dd..bd9f30c36103 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -1488,6 +1488,7 @@ mod tests { use crate::arrow::ARROW_SCHEMA_META_KEY; use crate::column::page::{Page, PageReader}; use crate::file::page_encoding_stats::PageEncodingStats; + use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::reader::SerializedPageReader; use crate::format::PageHeader; use crate::schema::types::ColumnPath; @@ -1507,7 +1508,6 @@ mod tests { use crate::basic::Encoding; use crate::data_type::AsBytes; use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaData, ParquetMetaDataReader}; - use crate::file::page_index::index::Index; use crate::file::properties::{ BloomFilterPosition, EnabledStatistics, ReaderProperties, WriterVersion, }; @@ -4002,9 +4002,12 @@ mod tests { assert_eq!(column_index[0].len(), 2); // 2 columns let a_idx = &column_index[0][0]; - assert!(matches!(a_idx, Index::BYTE_ARRAY(_)), "{a_idx:?}"); + assert!( + matches!(a_idx, ColumnIndexMetaData::BYTE_ARRAY(_)), + "{a_idx:?}" + ); let b_idx = &column_index[0][1]; - assert!(matches!(b_idx, Index::NONE), "{b_idx:?}"); + assert!(matches!(b_idx, ColumnIndexMetaData::NONE), "{b_idx:?}"); } #[test] @@ -4070,9 +4073,9 @@ mod tests { assert_eq!(column_index[0].len(), 2); // 2 columns let a_idx = &column_index[0][0]; - assert!(matches!(a_idx, Index::NONE), "{a_idx:?}"); + assert!(matches!(a_idx, ColumnIndexMetaData::NONE), "{a_idx:?}"); let b_idx = &column_index[0][1]; - assert!(matches!(b_idx, Index::NONE), "{b_idx:?}"); + assert!(matches!(b_idx, ColumnIndexMetaData::NONE), "{b_idx:?}"); } #[test] diff --git a/parquet/src/bin/parquet-index.rs b/parquet/src/bin/parquet-index.rs index e91f5e5a9f17..397a75c76ae4 100644 --- a/parquet/src/bin/parquet-index.rs +++ b/parquet/src/bin/parquet-index.rs @@ -35,8 +35,11 @@ //! [page index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md use clap::Parser; +use parquet::data_type::ByteArray; use parquet::errors::{ParquetError, Result}; -use parquet::file::page_index::index::{Index, PageIndex}; +use parquet::file::page_index::column_index::{ + ByteArrayColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex, +}; use parquet::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; use parquet::file::reader::{FileReader, SerializedFileReader}; use parquet::file::serialized_reader::ReadOptionsBuilder; @@ -96,16 +99,20 @@ impl Args { let row_counts = compute_row_counts(offset_index.page_locations.as_slice(), row_group.num_rows()); match &column_indices[column_idx] { - Index::NONE => println!("NO INDEX"), - Index::BOOLEAN(v) => print_index(&v.indexes, offset_index, &row_counts)?, - Index::INT32(v) => print_index(&v.indexes, offset_index, &row_counts)?, - Index::INT64(v) => print_index(&v.indexes, offset_index, &row_counts)?, - Index::INT96(v) => print_index(&v.indexes, offset_index, &row_counts)?, - Index::FLOAT(v) => print_index(&v.indexes, offset_index, &row_counts)?, - Index::DOUBLE(v) => print_index(&v.indexes, offset_index, &row_counts)?, - Index::BYTE_ARRAY(v) => print_index(&v.indexes, offset_index, &row_counts)?, - Index::FIXED_LEN_BYTE_ARRAY(v) => { - print_index(&v.indexes, offset_index, &row_counts)? + ColumnIndexMetaData::NONE => println!("NO INDEX"), + ColumnIndexMetaData::BOOLEAN(v) => { + print_index::(v, offset_index, &row_counts)? + } + ColumnIndexMetaData::INT32(v) => print_index(v, offset_index, &row_counts)?, + ColumnIndexMetaData::INT64(v) => print_index(v, offset_index, &row_counts)?, + ColumnIndexMetaData::INT96(v) => print_index(v, offset_index, &row_counts)?, + ColumnIndexMetaData::FLOAT(v) => print_index(v, offset_index, &row_counts)?, + ColumnIndexMetaData::DOUBLE(v) => print_index(v, offset_index, &row_counts)?, + ColumnIndexMetaData::BYTE_ARRAY(v) => { + print_bytes_index(v, offset_index, &row_counts)? + } + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(v) => { + print_bytes_index(v, offset_index, &row_counts)? } } } @@ -131,20 +138,21 @@ fn compute_row_counts(offset_index: &[PageLocation], rows: i64) -> Vec { /// Prints index information for a single column chunk fn print_index( - column_index: &[PageIndex], + column_index: &PrimitiveColumnIndex, offset_index: &OffsetIndexMetaData, row_counts: &[i64], ) -> Result<()> { - if column_index.len() != offset_index.page_locations.len() { + if column_index.num_pages() as usize != offset_index.page_locations.len() { return Err(ParquetError::General(format!( "Index length mismatch, got {} and {}", - column_index.len(), + column_index.num_pages(), offset_index.page_locations.len() ))); } - for (idx, ((c, o), row_count)) in column_index - .iter() + for (idx, (((min, max), o), row_count)) in column_index + .min_values_iter() + .zip(column_index.max_values_iter()) .zip(offset_index.page_locations()) .zip(row_counts) .enumerate() @@ -153,12 +161,12 @@ fn print_index( "Page {:>5} at offset {:#010x} with length {:>10} and row count {:>10}", idx, o.offset, o.compressed_page_size, row_count ); - match &c.min { + match min { Some(m) => print!(", min {m:>10}"), None => print!(", min {:>10}", "NONE"), } - match &c.max { + match max { Some(m) => print!(", max {m:>10}"), None => print!(", max {:>10}", "NONE"), } @@ -168,6 +176,51 @@ fn print_index( Ok(()) } +fn print_bytes_index( + column_index: &ByteArrayColumnIndex, + offset_index: &OffsetIndexMetaData, + row_counts: &[i64], +) -> Result<()> { + if column_index.num_pages() as usize != offset_index.page_locations.len() { + return Err(ParquetError::General(format!( + "Index length mismatch, got {} and {}", + column_index.num_pages(), + offset_index.page_locations.len() + ))); + } + + for (idx, (((min, max), o), row_count)) in column_index + .min_values_iter() + .zip(column_index.max_values_iter()) + .zip(offset_index.page_locations()) + .zip(row_counts) + .enumerate() + { + print!( + "Page {:>5} at offset {:#010x} with length {:>10} and row count {:>10}", + idx, o.offset, o.compressed_page_size, row_count + ); + match min { + Some(m) => match String::from_utf8(m.to_vec()) { + Ok(s) => print!(", min {s:>10}"), + Err(_) => print!(", min {:>10}", ByteArray::from(m)), + }, + None => print!(", min {:>10}", "NONE"), + } + + match max { + Some(m) => match String::from_utf8(m.to_vec()) { + Ok(s) => print!(", max {s:>10}"), + Err(_) => print!(", min {:>10}", ByteArray::from(m)), + }, + None => print!(", max {:>10}", "NONE"), + } + println!() + } + + Ok(()) +} + fn main() -> Result<()> { Args::parse().run() } diff --git a/parquet/src/file/metadata/memory.rs b/parquet/src/file/metadata/memory.rs index 0b8d3b336fc0..69eee3c2999d 100644 --- a/parquet/src/file/metadata/memory.rs +++ b/parquet/src/file/metadata/memory.rs @@ -24,6 +24,9 @@ use crate::file::metadata::{ ColumnChunkMetaData, FileMetaData, KeyValue, RowGroupMetaData, SortingColumn, }; use crate::file::page_encoding_stats::PageEncodingStats; +use crate::file::page_index::column_index::{ + ByteArrayColumnIndex, ColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex, +}; use crate::file::page_index::index::{Index, NativeIndex, PageIndex}; use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; use crate::file::statistics::{Statistics, ValueStatistics}; @@ -154,6 +157,48 @@ impl HeapSize for OffsetIndexMetaData { } } +impl HeapSize for ColumnIndexMetaData { + fn heap_size(&self) -> usize { + match self { + Self::NONE => 0, + Self::BOOLEAN(native_index) => native_index.heap_size(), + Self::INT32(native_index) => native_index.heap_size(), + Self::INT64(native_index) => native_index.heap_size(), + Self::INT96(native_index) => native_index.heap_size(), + Self::FLOAT(native_index) => native_index.heap_size(), + Self::DOUBLE(native_index) => native_index.heap_size(), + Self::BYTE_ARRAY(native_index) => native_index.heap_size(), + Self::FIXED_LEN_BYTE_ARRAY(native_index) => native_index.heap_size(), + } + } +} + +impl HeapSize for ColumnIndex { + fn heap_size(&self) -> usize { + self.null_pages.heap_size() + + self.boundary_order.heap_size() + + self.null_counts.heap_size() + + self.definition_level_histograms.heap_size() + + self.repetition_level_histograms.heap_size() + } +} + +impl HeapSize for PrimitiveColumnIndex { + fn heap_size(&self) -> usize { + self.column_index.heap_size() + self.min_values.heap_size() + self.max_values.heap_size() + } +} + +impl HeapSize for ByteArrayColumnIndex { + fn heap_size(&self) -> usize { + self.column_index.heap_size() + + self.min_bytes.heap_size() + + self.min_offsets.heap_size() + + self.max_bytes.heap_size() + + self.max_offsets.heap_size() + } +} + impl HeapSize for Index { fn heap_size(&self) -> usize { match self { @@ -193,6 +238,11 @@ impl HeapSize for bool { 0 // no heap allocations } } +impl HeapSize for u8 { + fn heap_size(&self) -> usize { + 0 // no heap allocations + } +} impl HeapSize for i32 { fn heap_size(&self) -> usize { 0 // no heap allocations diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index f2fe9de77e72..69cdf8f10714 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -106,7 +106,7 @@ use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData}; pub(crate) use crate::file::metadata::memory::HeapSize; use crate::file::{ page_encoding_stats::{self, PageEncodingStats}, - page_index::offset_index::PageLocation, + page_index::{column_index::ColumnIndexMetaData, offset_index::PageLocation}, }; use crate::file::{ page_index::index::PageIndex, @@ -156,7 +156,7 @@ pub(crate) use writer::ThriftMetadataWriter; /// /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md /// [`ColumnIndex`]: crate::format::ColumnIndex -pub type ParquetColumnIndex = Vec>; +pub type ParquetColumnIndex = Vec>; /// [`OffsetIndexMetaData`] for each data page of each row group of each column /// @@ -1948,7 +1948,7 @@ impl OffsetIndexBuilder { mod tests { use super::*; use crate::basic::{PageType, SortOrder}; - use crate::file::page_index::index::NativeIndex; + use crate::file::page_index::column_index::{ColumnIndex, PrimitiveColumnIndex}; #[test] fn test_row_group_metadata_thrift_conversion() { @@ -2223,7 +2223,17 @@ mod tests { let mut column_index = ColumnIndexBuilder::new(Type::BOOLEAN); column_index.append(false, vec![1u8], vec![2u8, 3u8], 4); let column_index = column_index.build_to_thrift(); - let native_index = NativeIndex::::try_new(column_index).unwrap(); + let native_index = PrimitiveColumnIndex:: { + column_index: ColumnIndex { + null_pages: column_index.null_pages, + boundary_order: column_index.boundary_order.try_into().unwrap(), + null_counts: column_index.null_counts, + repetition_level_histograms: column_index.repetition_level_histograms, + definition_level_histograms: column_index.definition_level_histograms, + }, + min_values: vec![], + max_values: vec![], + }; // Now, add in OffsetIndex let mut offset_index = OffsetIndexBuilder::new(); @@ -2237,16 +2247,16 @@ mod tests { let parquet_meta = ParquetMetaDataBuilder::new(file_metadata) .set_row_groups(row_group_meta) - .set_column_index(Some(vec![vec![Index::BOOLEAN(native_index)]])) + .set_column_index(Some(vec![vec![ColumnIndexMetaData::BOOLEAN(native_index)]])) .set_offset_index(Some(vec![vec![ OffsetIndexMetaData::try_new(offset_index).unwrap() ]])) .build(); #[cfg(not(feature = "encryption"))] - let bigger_expected_size = 2784; + let bigger_expected_size = 2704; #[cfg(feature = "encryption")] - let bigger_expected_size = 3120; + let bigger_expected_size = 3040; // more set fields means more memory usage assert!(bigger_expected_size > base_expected_size); diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index a403f4eee8f0..57cc7c57ac66 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -34,7 +34,7 @@ use bytes::Bytes; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaData, RowGroupMetaData}; -use crate::file::page_index::index::Index; +use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index}; use crate::file::reader::ChunkReader; use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER}; @@ -566,7 +566,7 @@ impl ParquetMetaDataReader { col_idx, ) } - None => Ok(Index::NONE), + None => Ok(ColumnIndexMetaData::NONE), }) .collect::>>() }) @@ -584,7 +584,7 @@ impl ParquetMetaDataReader { column: &ColumnChunkMetaData, row_group_index: usize, col_index: usize, - ) -> Result { + ) -> Result { match &column.column_crypto_metadata { Some(crypto_metadata) => { let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| { @@ -612,7 +612,7 @@ impl ParquetMetaDataReader { column: &ColumnChunkMetaData, _row_group_index: usize, _col_index: usize, - ) -> Result { + ) -> Result { decode_column_index(bytes, column.column_type()) } diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index acae20ec3cef..404bcf5dba8a 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -24,9 +24,7 @@ use crate::encryption::{ }; #[cfg(feature = "encryption")] use crate::errors::ParquetError; -use crate::errors::Result; use crate::file::metadata::{KeyValue, ParquetMetaData}; -use crate::file::page_index::index::Index; use crate::file::writer::{get_file_magic, TrackedWrite}; use crate::format::EncryptionAlgorithm; #[cfg(feature = "encryption")] @@ -34,6 +32,7 @@ use crate::format::{AesGcmV1, ColumnCryptoMetaData}; use crate::schema::types; use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr}; use crate::thrift::TSerializable; +use crate::{errors::Result, file::page_index::column_index::ColumnIndexMetaData}; use std::io::Write; use std::sync::Arc; use thrift::protocol::TCompactOutputProtocol; @@ -391,17 +390,31 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { column_indexes .iter() .map(|column_index| match column_index { - Index::NONE => None, - Index::BOOLEAN(column_index) => Some(column_index.to_thrift()), - Index::BYTE_ARRAY(column_index) => Some(column_index.to_thrift()), - Index::DOUBLE(column_index) => Some(column_index.to_thrift()), - Index::FIXED_LEN_BYTE_ARRAY(column_index) => { + ColumnIndexMetaData::NONE => None, + ColumnIndexMetaData::BOOLEAN(column_index) => { + Some(column_index.to_thrift()) + } + ColumnIndexMetaData::BYTE_ARRAY(column_index) => { + Some(column_index.to_thrift()) + } + ColumnIndexMetaData::DOUBLE(column_index) => { + Some(column_index.to_thrift()) + } + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => { + Some(column_index.to_thrift()) + } + ColumnIndexMetaData::FLOAT(column_index) => { + Some(column_index.to_thrift()) + } + ColumnIndexMetaData::INT32(column_index) => { + Some(column_index.to_thrift()) + } + ColumnIndexMetaData::INT64(column_index) => { + Some(column_index.to_thrift()) + } + ColumnIndexMetaData::INT96(column_index) => { Some(column_index.to_thrift()) } - Index::FLOAT(column_index) => Some(column_index.to_thrift()), - Index::INT32(column_index) => Some(column_index.to_thrift()), - Index::INT64(column_index) => Some(column_index.to_thrift()), - Index::INT96(column_index) => Some(column_index.to_thrift()), }) .collect() }) diff --git a/parquet/src/file/page_index/column_index.rs b/parquet/src/file/page_index/column_index.rs new file mode 100644 index 000000000000..2d43c93b2e4b --- /dev/null +++ b/parquet/src/file/page_index/column_index.rs @@ -0,0 +1,569 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! [`ColumnIndexMetaData`] structures holding decoded [`ColumnIndex`] information +//! +//! [`ColumnIndex`]: crate::format::ColumnIndex +//! + +use crate::{ + data_type::{ByteArray, FixedLenByteArray}, + errors::Result, +}; +use std::ops::Deref; + +use crate::{ + basic::BoundaryOrder, + data_type::{private::ParquetValueType, Int96}, + file::page_index::index_reader::ThriftColumnIndex, +}; + +/// Common bits of the column index +#[derive(Debug, Clone, PartialEq)] +pub struct ColumnIndex { + pub(crate) null_pages: Vec, + pub(crate) boundary_order: BoundaryOrder, + pub(crate) null_counts: Option>, + pub(crate) repetition_level_histograms: Option>, + pub(crate) definition_level_histograms: Option>, +} + +impl ColumnIndex { + /// Returns the number of pages + pub fn num_pages(&self) -> u64 { + self.null_pages.len() as u64 + } + + /// Returns the number of null values in the page indexed by `idx` + /// + /// Returns `None` if no null counts have been set in the index + pub fn null_count(&self, idx: usize) -> Option { + self.null_counts.as_ref().map(|nc| nc[idx]) + } + + /// Returns the repetition level histogram for the page indexed by `idx` + pub fn repetition_level_histogram(&self, idx: usize) -> Option<&[i64]> { + if let Some(rep_hists) = self.repetition_level_histograms.as_ref() { + let num_lvls = rep_hists.len() / self.num_pages() as usize; + let start = num_lvls * idx; + Some(&rep_hists[start..start + num_lvls]) + } else { + None + } + } + + /// Returns the definition level histogram for the page indexed by `idx` + pub fn definition_level_histogram(&self, idx: usize) -> Option<&[i64]> { + if let Some(def_hists) = self.definition_level_histograms.as_ref() { + let num_lvls = def_hists.len() / self.num_pages() as usize; + let start = num_lvls * idx; + Some(&def_hists[start..start + num_lvls]) + } else { + None + } + } + + /// Returns whether the page indexed by `idx` consists of all null values + pub fn is_null_page(&self, idx: usize) -> bool { + self.null_pages[idx] + } +} + +/// Column index for primitive types +#[derive(Debug, Clone, PartialEq)] +pub struct PrimitiveColumnIndex { + pub(crate) column_index: ColumnIndex, + pub(crate) min_values: Vec, + pub(crate) max_values: Vec, +} + +impl PrimitiveColumnIndex { + pub(super) fn try_new(index: ThriftColumnIndex) -> Result { + let len = index.null_pages.len(); + + let mut min_values = Vec::with_capacity(len); + let mut max_values = Vec::with_capacity(len); + + for (i, is_null) in index.null_pages.iter().enumerate().take(len) { + if !is_null { + let min = index.min_values[i]; + min_values.push(T::try_from_le_slice(min)?); + + let max = index.max_values[i]; + max_values.push(T::try_from_le_slice(max)?); + } else { + // need placeholders + min_values.push(Default::default()); + max_values.push(Default::default()); + } + } + + Ok(Self { + column_index: ColumnIndex { + null_pages: index.null_pages, + boundary_order: index.boundary_order, + null_counts: index.null_counts, + repetition_level_histograms: index.repetition_level_histograms, + definition_level_histograms: index.definition_level_histograms, + }, + min_values, + max_values, + }) + } + + pub(crate) fn to_thrift(&self) -> crate::format::ColumnIndex { + let min_values = self + .min_values + .iter() + .map(|x| x.as_bytes().to_vec()) + .collect::>(); + + let max_values = self + .max_values + .iter() + .map(|x| x.as_bytes().to_vec()) + .collect::>(); + + let null_counts = self.null_counts.clone(); + let repetition_level_histograms = self.repetition_level_histograms.clone(); + let definition_level_histograms = self.definition_level_histograms.clone(); + let null_pages = self.null_pages.clone(); + + crate::format::ColumnIndex::new( + null_pages, + min_values, + max_values, + self.boundary_order.into(), + null_counts, + repetition_level_histograms, + definition_level_histograms, + ) + } +} + +impl PrimitiveColumnIndex { + /// Returns an array containing the min values for each page. + /// + /// Values in the returned slice are only valid if [`ColumnIndex::is_null_page()`] + /// is `false` for the same index. + pub fn min_values(&self) -> &[T] { + &self.min_values + } + + /// Returns an array containing the max values for each page. + /// + /// Values in the returned slice are only valid if [`ColumnIndex::is_null_page()`] + /// is `false` for the same index. + pub fn max_values(&self) -> &[T] { + &self.max_values + } + + /// Returns an iterator over the min values. + /// + /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`. + pub fn min_values_iter(&self) -> impl Iterator> { + self.min_values.iter().enumerate().map(|(i, min)| { + if self.is_null_page(i) { + None + } else { + Some(min) + } + }) + } + + /// Returns an iterator over the max values. + /// + /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`. + pub fn max_values_iter(&self) -> impl Iterator> { + self.max_values.iter().enumerate().map(|(i, min)| { + if self.is_null_page(i) { + None + } else { + Some(min) + } + }) + } + + /// Returns the min value for the page indexed by `idx` + /// + /// It is `None` when all values are null + pub fn min_value(&self, idx: usize) -> Option<&T> { + if self.null_pages[idx] { + None + } else { + Some(&self.min_values[idx]) + } + } + + /// Returns the max value for the page indexed by `idx` + /// + /// It is `None` when all values are null + pub fn max_value(&self, idx: usize) -> Option<&T> { + if self.null_pages[idx] { + None + } else { + Some(&self.max_values[idx]) + } + } +} + +impl Deref for PrimitiveColumnIndex { + type Target = ColumnIndex; + + fn deref(&self) -> &Self::Target { + &self.column_index + } +} + +/// Column index for byte arrays (fixed length and variable) +#[derive(Debug, Clone, PartialEq)] +pub struct ByteArrayColumnIndex { + pub(crate) column_index: ColumnIndex, + // raw bytes for min and max values + pub(crate) min_bytes: Vec, + pub(crate) min_offsets: Vec, + pub(crate) max_bytes: Vec, + pub(crate) max_offsets: Vec, +} + +impl ByteArrayColumnIndex { + pub(super) fn try_new(index: ThriftColumnIndex) -> Result { + let len = index.null_pages.len(); + + let min_len = index.min_values.iter().map(|&v| v.len()).sum(); + let max_len = index.max_values.iter().map(|&v| v.len()).sum(); + let mut min_bytes = vec![0u8; min_len]; + let mut max_bytes = vec![0u8; max_len]; + + let mut min_offsets = vec![0usize; len + 1]; + let mut max_offsets = vec![0usize; len + 1]; + + let mut min_pos = 0; + let mut max_pos = 0; + + for (i, is_null) in index.null_pages.iter().enumerate().take(len) { + if !is_null { + let min = index.min_values[i]; + let dst = &mut min_bytes[min_pos..min_pos + min.len()]; + dst.copy_from_slice(min); + min_offsets[i] = min_pos; + min_pos += min.len(); + + let max = index.max_values[i]; + let dst = &mut max_bytes[max_pos..max_pos + max.len()]; + dst.copy_from_slice(max); + max_offsets[i] = max_pos; + max_pos += max.len(); + } else { + min_offsets[i] = min_pos; + max_offsets[i] = max_pos; + } + } + + min_offsets[len] = min_pos; + max_offsets[len] = max_pos; + + Ok(Self { + column_index: ColumnIndex { + null_pages: index.null_pages, + boundary_order: index.boundary_order, + null_counts: index.null_counts, + repetition_level_histograms: index.repetition_level_histograms, + definition_level_histograms: index.definition_level_histograms, + }, + + min_bytes, + min_offsets, + max_bytes, + max_offsets, + }) + } + + /// Returns the min value for the page indexed by `idx` + /// + /// It is `None` when all values are null + pub fn min_value(&self, idx: usize) -> Option<&[u8]> { + if self.null_pages[idx] { + None + } else { + let start = self.min_offsets[idx]; + let end = self.min_offsets[idx + 1]; + Some(&self.min_bytes[start..end]) + } + } + + /// Returns the max value for the page indexed by `idx` + /// + /// It is `None` when all values are null + pub fn max_value(&self, idx: usize) -> Option<&[u8]> { + if self.null_pages[idx] { + None + } else { + let start = self.max_offsets[idx]; + let end = self.max_offsets[idx + 1]; + Some(&self.max_bytes[start..end]) + } + } + + /// Returns an iterator over the min values. + /// + /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`. + pub fn min_values_iter(&self) -> impl Iterator> { + (0..self.num_pages() as usize).map(|i| { + if self.is_null_page(i) { + None + } else { + self.min_value(i) + } + }) + } + + /// Returns an iterator over the max values. + /// + /// Values may be `None` when [`ColumnIndex::is_null_page()`] is `true`. + pub fn max_values_iter(&self) -> impl Iterator> { + (0..self.num_pages() as usize).map(|i| { + if self.is_null_page(i) { + None + } else { + self.max_value(i) + } + }) + } + + pub(crate) fn to_thrift(&self) -> crate::format::ColumnIndex { + let mut min_values = Vec::with_capacity(self.num_pages() as usize); + for i in 0..self.num_pages() as usize { + min_values.push(self.min_value(i).unwrap_or(&[]).to_owned()); + } + + let mut max_values = Vec::with_capacity(self.num_pages() as usize); + for i in 0..self.num_pages() as usize { + max_values.push(self.max_value(i).unwrap_or(&[]).to_owned()); + } + + let null_counts = self.null_counts.clone(); + let repetition_level_histograms = self.repetition_level_histograms.clone(); + let definition_level_histograms = self.definition_level_histograms.clone(); + let null_pages = self.null_pages.clone(); + + crate::format::ColumnIndex::new( + null_pages, + min_values, + max_values, + self.boundary_order.into(), + null_counts, + repetition_level_histograms, + definition_level_histograms, + ) + } +} + +impl Deref for ByteArrayColumnIndex { + type Target = ColumnIndex; + + fn deref(&self) -> &Self::Target { + &self.column_index + } +} + +// Macro to generate getter functions for ColumnIndexMetaData. +macro_rules! colidx_enum_func { + ($self:ident, $func:ident, $arg:ident) => {{ + match *$self { + Self::BOOLEAN(ref typed) => typed.$func($arg), + Self::INT32(ref typed) => typed.$func($arg), + Self::INT64(ref typed) => typed.$func($arg), + Self::INT96(ref typed) => typed.$func($arg), + Self::FLOAT(ref typed) => typed.$func($arg), + Self::DOUBLE(ref typed) => typed.$func($arg), + Self::BYTE_ARRAY(ref typed) => typed.$func($arg), + Self::FIXED_LEN_BYTE_ARRAY(ref typed) => typed.$func($arg), + _ => panic!(concat!( + "Cannot call ", + stringify!($func), + " on ColumnIndexMetaData::NONE" + )), + } + }}; + ($self:ident, $func:ident) => {{ + match *$self { + Self::BOOLEAN(ref typed) => typed.$func(), + Self::INT32(ref typed) => typed.$func(), + Self::INT64(ref typed) => typed.$func(), + Self::INT96(ref typed) => typed.$func(), + Self::FLOAT(ref typed) => typed.$func(), + Self::DOUBLE(ref typed) => typed.$func(), + Self::BYTE_ARRAY(ref typed) => typed.$func(), + Self::FIXED_LEN_BYTE_ARRAY(ref typed) => typed.$func(), + _ => panic!(concat!( + "Cannot call ", + stringify!($func), + " on ColumnIndexMetaData::NONE" + )), + } + }}; +} + +/// index +#[derive(Debug, Clone, PartialEq)] +#[allow(non_camel_case_types)] +pub enum ColumnIndexMetaData { + /// Sometimes reading page index from parquet file + /// will only return pageLocations without min_max index, + /// `NONE` represents this lack of index information + NONE, + /// Boolean type index + BOOLEAN(PrimitiveColumnIndex), + /// 32-bit integer type index + INT32(PrimitiveColumnIndex), + /// 64-bit integer type index + INT64(PrimitiveColumnIndex), + /// 96-bit integer type (timestamp) index + INT96(PrimitiveColumnIndex), + /// 32-bit floating point type index + FLOAT(PrimitiveColumnIndex), + /// 64-bit floating point type index + DOUBLE(PrimitiveColumnIndex), + /// Byte array type index + BYTE_ARRAY(ByteArrayColumnIndex), + /// Fixed length byte array type index + FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex), +} + +impl ColumnIndexMetaData { + /// Return min/max elements inside ColumnIndex are ordered or not. + pub fn is_sorted(&self) -> bool { + // 0:UNORDERED, 1:ASCENDING ,2:DESCENDING, + if let Some(order) = self.get_boundary_order() { + order != BoundaryOrder::UNORDERED + } else { + false + } + } + + /// Get boundary_order of this page index. + pub fn get_boundary_order(&self) -> Option { + match self { + Self::NONE => None, + Self::BOOLEAN(index) => Some(index.boundary_order), + Self::INT32(index) => Some(index.boundary_order), + Self::INT64(index) => Some(index.boundary_order), + Self::INT96(index) => Some(index.boundary_order), + Self::FLOAT(index) => Some(index.boundary_order), + Self::DOUBLE(index) => Some(index.boundary_order), + Self::BYTE_ARRAY(index) => Some(index.boundary_order), + Self::FIXED_LEN_BYTE_ARRAY(index) => Some(index.boundary_order), + } + } + + /// Returns array of null counts, one per page. + /// + /// Returns `None` if now null counts have been set in the index + pub fn null_counts(&self) -> Option<&Vec> { + match self { + Self::NONE => None, + Self::BOOLEAN(index) => index.null_counts.as_ref(), + Self::INT32(index) => index.null_counts.as_ref(), + Self::INT64(index) => index.null_counts.as_ref(), + Self::INT96(index) => index.null_counts.as_ref(), + Self::FLOAT(index) => index.null_counts.as_ref(), + Self::DOUBLE(index) => index.null_counts.as_ref(), + Self::BYTE_ARRAY(index) => index.null_counts.as_ref(), + Self::FIXED_LEN_BYTE_ARRAY(index) => index.null_counts.as_ref(), + } + } + + /// Returns the number of pages + pub fn num_pages(&self) -> u64 { + colidx_enum_func!(self, num_pages) + } + + /// Returns the number of null values in the page indexed by `idx` + /// + /// Returns `None` if no null counts have been set in the index + pub fn null_count(&self, idx: usize) -> Option { + colidx_enum_func!(self, null_count, idx) + } + + /// Returns the repetition level histogram for the page indexed by `idx` + pub fn repetition_level_histogram(&self, idx: usize) -> Option<&[i64]> { + colidx_enum_func!(self, repetition_level_histogram, idx) + } + + /// Returns the definition level histogram for the page indexed by `idx` + pub fn definition_level_histogram(&self, idx: usize) -> Option<&[i64]> { + colidx_enum_func!(self, definition_level_histogram, idx) + } + + /// Returns whether the page indexed by `idx` consists of all null values + pub fn is_null_page(&self, idx: usize) -> bool { + colidx_enum_func!(self, is_null_page, idx) + } +} + +/// Provides iterators over min and max values of a [`ColumnIndexMetaData`] +pub trait ColumnIndexIterators { + /// Can be one of `bool`, `i32`, `i64`, `Int96`, `f32`, `f64`, [`ByteArray`], + /// or [`FixedLenByteArray`] + type Item; + + /// Return iterator over the min values for the index + fn min_values_iter(colidx: &ColumnIndexMetaData) -> impl Iterator>; + + /// Return iterator over the max values for the index + fn max_values_iter(colidx: &ColumnIndexMetaData) -> impl Iterator>; +} + +macro_rules! column_index_iters { + ($item: ident, $variant: ident, $conv:expr) => { + impl ColumnIndexIterators for $item { + type Item = $item; + + fn min_values_iter( + colidx: &ColumnIndexMetaData, + ) -> impl Iterator> { + if let ColumnIndexMetaData::$variant(index) = colidx { + index.min_values_iter().map($conv) + } else { + panic!(concat!("Wrong type for ", stringify!($item), " iterator")) + } + } + + fn max_values_iter( + colidx: &ColumnIndexMetaData, + ) -> impl Iterator> { + if let ColumnIndexMetaData::$variant(index) = colidx { + index.max_values_iter().map($conv) + } else { + panic!(concat!("Wrong type for ", stringify!($item), " iterator")) + } + } + } + }; +} + +column_index_iters!(bool, BOOLEAN, |v| v.copied()); +column_index_iters!(i32, INT32, |v| v.copied()); +column_index_iters!(i64, INT64, |v| v.copied()); +column_index_iters!(Int96, INT96, |v| v.copied()); +column_index_iters!(f32, FLOAT, |v| v.copied()); +column_index_iters!(f64, DOUBLE, |v| v.copied()); +column_index_iters!(ByteArray, BYTE_ARRAY, |v| v + .map(|v| ByteArray::from(v.to_owned()))); +column_index_iters!(FixedLenByteArray, FIXED_LEN_BYTE_ARRAY, |v| v + .map(|v| FixedLenByteArray::from(v.to_owned()))); diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs index ed586bcd33d0..861dc0c3b04e 100644 --- a/parquet/src/file/page_index/index.rs +++ b/parquet/src/file/page_index/index.rs @@ -24,7 +24,7 @@ use crate::data_type::private::ParquetValueType; use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96}; use crate::errors::ParquetError; use crate::file::metadata::LevelHistogram; -use crate::file::page_index::index_reader::ColumnIndex; +use crate::file::page_index::index_reader::ThriftColumnIndex; use std::fmt::Debug; /// Typed statistics for one data page @@ -310,7 +310,8 @@ impl NativeIndex { } /// Creates a new [`NativeIndex`] - pub(crate) fn try_new_local(index: ColumnIndex) -> Result { + #[allow(dead_code)] + pub(super) fn try_new_local(index: ThriftColumnIndex) -> Result { let len = index.min_values.len(); // turn Option> into Vec> diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index fbe6d3984596..f35241689e1c 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -15,13 +15,15 @@ // specific language governing permissions and limitations // under the License. -//! Support for reading [`Index`] and [`OffsetIndexMetaData`] from parquet metadata. +//! Support for reading [`ColumnIndexMetaData`] and [`OffsetIndexMetaData`] from parquet metadata. use crate::basic::{BoundaryOrder, Type}; use crate::data_type::Int96; use crate::errors::{ParquetError, Result}; use crate::file::metadata::ColumnChunkMetaData; -use crate::file::page_index::index::{Index, NativeIndex}; +use crate::file::page_index::column_index::{ + ByteArrayColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex, +}; use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::file::reader::ChunkReader; use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; @@ -38,7 +40,7 @@ pub(crate) fn acc_range(a: Option>, b: Option>) -> Option< } } -/// Reads per-column [`Index`] for all columns of a row group by +/// Reads per-column [`ColumnIndexMetaData`] for all columns of a row group by /// decoding [`ColumnIndex`] . /// /// Returns a vector of `index[column_number]`. @@ -56,7 +58,7 @@ pub(crate) fn acc_range(a: Option>, b: Option>) -> Option< pub fn read_columns_indexes( reader: &R, chunks: &[ColumnChunkMetaData], -) -> Result>, ParquetError> { +) -> Result>, ParquetError> { let fetch = chunks .iter() .fold(None, |range, c| acc_range(range, c.column_index_range())); @@ -77,7 +79,7 @@ pub fn read_columns_indexes( ..usize::try_from(r.end - fetch.start)?], c.column_type(), ), - None => Ok(Index::NONE), + None => Ok(ColumnIndexMetaData::NONE), }) .collect(), ) @@ -134,8 +136,9 @@ pub(crate) fn decode_offset_index(data: &[u8]) -> Result { +pub(super) struct ThriftColumnIndex<'a> { 1: required list null_pages 2: required list<'a> min_values 3: required list<'a> max_values @@ -146,20 +149,25 @@ pub(crate) struct ColumnIndex<'a> { } ); -pub(crate) fn decode_column_index(data: &[u8], column_type: Type) -> Result { +pub(crate) fn decode_column_index( + data: &[u8], + column_type: Type, +) -> Result { let mut prot = ThriftCompactInputProtocol::new(data); - let index = ColumnIndex::try_from(&mut prot)?; + let index = ThriftColumnIndex::try_from(&mut prot)?; let index = match column_type { - Type::BOOLEAN => Index::BOOLEAN(NativeIndex::::try_new_local(index)?), - Type::INT32 => Index::INT32(NativeIndex::::try_new_local(index)?), - Type::INT64 => Index::INT64(NativeIndex::::try_new_local(index)?), - Type::INT96 => Index::INT96(NativeIndex::::try_new_local(index)?), - Type::FLOAT => Index::FLOAT(NativeIndex::::try_new_local(index)?), - Type::DOUBLE => Index::DOUBLE(NativeIndex::::try_new_local(index)?), - Type::BYTE_ARRAY => Index::BYTE_ARRAY(NativeIndex::try_new_local(index)?), + Type::BOOLEAN => { + ColumnIndexMetaData::BOOLEAN(PrimitiveColumnIndex::::try_new(index)?) + } + Type::INT32 => ColumnIndexMetaData::INT32(PrimitiveColumnIndex::::try_new(index)?), + Type::INT64 => ColumnIndexMetaData::INT64(PrimitiveColumnIndex::::try_new(index)?), + Type::INT96 => ColumnIndexMetaData::INT96(PrimitiveColumnIndex::::try_new(index)?), + Type::FLOAT => ColumnIndexMetaData::FLOAT(PrimitiveColumnIndex::::try_new(index)?), + Type::DOUBLE => ColumnIndexMetaData::DOUBLE(PrimitiveColumnIndex::::try_new(index)?), + Type::BYTE_ARRAY => ColumnIndexMetaData::BYTE_ARRAY(ByteArrayColumnIndex::try_new(index)?), Type::FIXED_LEN_BYTE_ARRAY => { - Index::FIXED_LEN_BYTE_ARRAY(NativeIndex::try_new_local(index)?) + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex::try_new(index)?) } }; diff --git a/parquet/src/file/page_index/mod.rs b/parquet/src/file/page_index/mod.rs index a8077896db34..ff70e2eca5dd 100644 --- a/parquet/src/file/page_index/mod.rs +++ b/parquet/src/file/page_index/mod.rs @@ -19,6 +19,7 @@ //! //! [Column Index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md +pub mod column_index; pub mod index; pub mod index_reader; pub mod offset_index; diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index bead048ee20f..5308825b0976 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -1102,13 +1102,15 @@ mod tests { use bytes::Buf; + use crate::file::page_index::column_index::{ + ByteArrayColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex, + }; use crate::file::properties::{EnabledStatistics, WriterProperties}; use crate::basic::{self, BoundaryOrder, ColumnOrder, SortOrder}; use crate::column::reader::ColumnReader; use crate::data_type::private::ParquetValueType; use crate::data_type::{AsBytes, FixedLenByteArrayType, Int32Type}; - use crate::file::page_index::index::{Index, NativeIndex}; #[allow(deprecated)] use crate::file::page_index::index_reader::{read_columns_indexes, read_offset_indexes}; use crate::file::writer::SerializedFileWriter; @@ -1912,21 +1914,19 @@ mod tests { // only one row group assert_eq!(column_index.len(), 1); - let index = if let Index::BYTE_ARRAY(index) = &column_index[0][0] { + let index = if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][0] { index } else { unreachable!() }; assert_eq!(index.boundary_order, BoundaryOrder::ASCENDING); - let index_in_pages = &index.indexes; //only one page group - assert_eq!(index_in_pages.len(), 1); + assert_eq!(index.num_pages(), 1); - let page0 = &index_in_pages[0]; - let min = page0.min.as_ref().unwrap(); - let max = page0.max.as_ref().unwrap(); + let min = index.min_value(0).unwrap(); + let max = index.max_value(0).unwrap(); assert_eq!(b"Hello", min.as_bytes()); assert_eq!(b"today", max.as_bytes()); @@ -1991,7 +1991,7 @@ mod tests { let boundary_order = &column_index[0][0].get_boundary_order(); assert!(boundary_order.is_some()); matches!(boundary_order.unwrap(), BoundaryOrder::UNORDERED); - if let Index::INT32(index) = &column_index[0][0] { + if let ColumnIndexMetaData::INT32(index) = &column_index[0][0] { check_native_page_index( index, 325, @@ -2004,15 +2004,15 @@ mod tests { }; //col1->bool_col:BOOLEAN UNCOMPRESSED DO:0 FPO:37329 SZ:3022/3022/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: false, max: true, num_nulls: 0] assert!(&column_index[0][1].is_sorted()); - if let Index::BOOLEAN(index) = &column_index[0][1] { - assert_eq!(index.indexes.len(), 82); + if let ColumnIndexMetaData::BOOLEAN(index) = &column_index[0][1] { + assert_eq!(index.num_pages(), 82); assert_eq!(row_group_offset_indexes[1].page_locations.len(), 82); } else { unreachable!() }; //col2->tinyint_col: INT32 UNCOMPRESSED DO:0 FPO:40351 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0] assert!(&column_index[0][2].is_sorted()); - if let Index::INT32(index) = &column_index[0][2] { + if let ColumnIndexMetaData::INT32(index) = &column_index[0][2] { check_native_page_index( index, 325, @@ -2025,7 +2025,7 @@ mod tests { }; //col4->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0] assert!(&column_index[0][3].is_sorted()); - if let Index::INT32(index) = &column_index[0][3] { + if let ColumnIndexMetaData::INT32(index) = &column_index[0][3] { check_native_page_index( index, 325, @@ -2038,7 +2038,7 @@ mod tests { }; //col5->smallint_col: INT32 UNCOMPRESSED DO:0 FPO:77676 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0] assert!(&column_index[0][4].is_sorted()); - if let Index::INT32(index) = &column_index[0][4] { + if let ColumnIndexMetaData::INT32(index) = &column_index[0][4] { check_native_page_index( index, 325, @@ -2051,7 +2051,7 @@ mod tests { }; //col6->bigint_col: INT64 UNCOMPRESSED DO:0 FPO:152326 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 90, num_nulls: 0] assert!(!&column_index[0][5].is_sorted()); - if let Index::INT64(index) = &column_index[0][5] { + if let ColumnIndexMetaData::INT64(index) = &column_index[0][5] { check_native_page_index( index, 528, @@ -2064,7 +2064,7 @@ mod tests { }; //col7->float_col: FLOAT UNCOMPRESSED DO:0 FPO:223924 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 9.9, num_nulls: 0] assert!(&column_index[0][6].is_sorted()); - if let Index::FLOAT(index) = &column_index[0][6] { + if let ColumnIndexMetaData::FLOAT(index) = &column_index[0][6] { check_native_page_index( index, 325, @@ -2077,7 +2077,7 @@ mod tests { }; //col8->double_col: DOUBLE UNCOMPRESSED DO:0 FPO:261249 SZ:71598/71598/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: -0.0, max: 90.89999999999999, num_nulls: 0] assert!(!&column_index[0][7].is_sorted()); - if let Index::DOUBLE(index) = &column_index[0][7] { + if let ColumnIndexMetaData::DOUBLE(index) = &column_index[0][7] { check_native_page_index( index, 528, @@ -2090,8 +2090,8 @@ mod tests { }; //col9->date_string_col: BINARY UNCOMPRESSED DO:0 FPO:332847 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 01/01/09, max: 12/31/10, num_nulls: 0] assert!(!&column_index[0][8].is_sorted()); - if let Index::BYTE_ARRAY(index) = &column_index[0][8] { - check_native_page_index( + if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][8] { + check_byte_array_page_index( index, 974, get_row_group_min_max_bytes(row_group_metadata, 8), @@ -2103,8 +2103,8 @@ mod tests { }; //col10->string_col: BINARY UNCOMPRESSED DO:0 FPO:444795 SZ:45298/45298/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 0, max: 9, num_nulls: 0] assert!(&column_index[0][9].is_sorted()); - if let Index::BYTE_ARRAY(index) = &column_index[0][9] { - check_native_page_index( + if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][9] { + check_byte_array_page_index( index, 352, get_row_group_min_max_bytes(row_group_metadata, 9), @@ -2117,14 +2117,14 @@ mod tests { //col11->timestamp_col: INT96 UNCOMPRESSED DO:0 FPO:490093 SZ:111948/111948/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[num_nulls: 0, min/max not defined] //Notice: min_max values for each page for this col not exits. assert!(!&column_index[0][10].is_sorted()); - if let Index::NONE = &column_index[0][10] { + if let ColumnIndexMetaData::NONE = &column_index[0][10] { assert_eq!(row_group_offset_indexes[10].page_locations.len(), 974); } else { unreachable!() }; //col12->year: INT32 UNCOMPRESSED DO:0 FPO:602041 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 2009, max: 2010, num_nulls: 0] assert!(&column_index[0][11].is_sorted()); - if let Index::INT32(index) = &column_index[0][11] { + if let ColumnIndexMetaData::INT32(index) = &column_index[0][11] { check_native_page_index( index, 325, @@ -2137,7 +2137,7 @@ mod tests { }; //col13->month: INT32 UNCOMPRESSED DO:0 FPO:639366 SZ:37325/37325/1.00 VC:7300 ENC:BIT_PACKED,RLE,PLAIN ST:[min: 1, max: 12, num_nulls: 0] assert!(!&column_index[0][12].is_sorted()); - if let Index::INT32(index) = &column_index[0][12] { + if let ColumnIndexMetaData::INT32(index) = &column_index[0][12] { check_native_page_index( index, 325, @@ -2151,17 +2151,31 @@ mod tests { } fn check_native_page_index( - row_group_index: &NativeIndex, + row_group_index: &PrimitiveColumnIndex, page_size: usize, min_max: (&[u8], &[u8]), boundary_order: BoundaryOrder, ) { - assert_eq!(row_group_index.indexes.len(), page_size); + assert_eq!(row_group_index.num_pages() as usize, page_size); assert_eq!(row_group_index.boundary_order, boundary_order); - row_group_index.indexes.iter().all(|x| { - x.min.as_ref().unwrap() >= &T::try_from_le_slice(min_max.0).unwrap() - && x.max.as_ref().unwrap() <= &T::try_from_le_slice(min_max.1).unwrap() - }); + assert!(row_group_index.min_values().iter().all(|x| { + x >= &T::try_from_le_slice(min_max.0).unwrap() + && x <= &T::try_from_le_slice(min_max.1).unwrap() + })); + } + + fn check_byte_array_page_index( + row_group_index: &ByteArrayColumnIndex, + page_size: usize, + min_max: (&[u8], &[u8]), + boundary_order: BoundaryOrder, + ) { + assert_eq!(row_group_index.num_pages() as usize, page_size); + assert_eq!(row_group_index.boundary_order, boundary_order); + for i in 0..row_group_index.num_pages() as usize { + let x = row_group_index.min_value(i).unwrap(); + assert!(x >= min_max.0 && x <= min_max.1); + } } fn get_row_group_min_max_bytes(r: &RowGroupMetaData, col_num: usize) -> (&[u8], &[u8]) { @@ -2402,12 +2416,11 @@ mod tests { assert_eq!(c.len(), 1); match &c[0] { - Index::FIXED_LEN_BYTE_ARRAY(v) => { - assert_eq!(v.indexes.len(), 1); - let page_idx = &v.indexes[0]; - assert_eq!(page_idx.null_count.unwrap(), 1); - assert_eq!(page_idx.min.as_ref().unwrap().as_ref(), &[0; 11]); - assert_eq!(page_idx.max.as_ref().unwrap().as_ref(), &[5; 11]); + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(v) => { + assert_eq!(v.num_pages(), 1); + assert_eq!(v.null_count(0).unwrap(), 1); + assert_eq!(v.min_value(0).unwrap(), &[0; 11]); + assert_eq!(v.max_value(0).unwrap(), &[5; 11]); } _ => unreachable!(), } @@ -2538,11 +2551,11 @@ mod tests { // test that we got the index matching the row group match pg_idx { - Index::INT32(int_idx) => { + ColumnIndexMetaData::INT32(int_idx) => { let min = col_stats.min_bytes_opt().unwrap().get_i32_le(); let max = col_stats.max_bytes_opt().unwrap().get_i32_le(); - assert_eq!(int_idx.indexes[0].min(), Some(min).as_ref()); - assert_eq!(int_idx.indexes[0].max(), Some(max).as_ref()); + assert_eq!(int_idx.min_value(0), Some(min).as_ref()); + assert_eq!(int_idx.max_value(0), Some(max).as_ref()); } _ => panic!("wrong stats type"), } @@ -2583,11 +2596,11 @@ mod tests { // test that we got the index matching the row group match pg_idx { - Index::INT32(int_idx) => { + ColumnIndexMetaData::INT32(int_idx) => { let min = col_stats.min_bytes_opt().unwrap().get_i32_le(); let max = col_stats.max_bytes_opt().unwrap().get_i32_le(); - assert_eq!(int_idx.indexes[0].min(), Some(min).as_ref()); - assert_eq!(int_idx.indexes[0].max(), Some(max).as_ref()); + assert_eq!(int_idx.min_value(0), Some(min).as_ref()); + assert_eq!(int_idx.max_value(0), Some(max).as_ref()); } _ => panic!("wrong stats type"), } diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 7db517ced5b2..65b96246ea03 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -1062,7 +1062,7 @@ mod tests { use crate::column::reader::get_typed_column_reader; use crate::compression::{create_codec, Codec, CodecOptionsBuilder}; use crate::data_type::{BoolType, ByteArrayType, Int32Type}; - use crate::file::page_index::index::Index; + use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::properties::EnabledStatistics; use crate::file::serialized_reader::ReadOptionsBuilder; use crate::file::{ @@ -2083,9 +2083,9 @@ mod tests { assert_eq!(column_index[0].len(), 2); // 2 column let a_idx = &column_index[0][0]; - assert!(matches!(a_idx, Index::INT32(_)), "{a_idx:?}"); + assert!(matches!(a_idx, ColumnIndexMetaData::INT32(_)), "{a_idx:?}"); let b_idx = &column_index[0][1]; - assert!(matches!(b_idx, Index::NONE), "{b_idx:?}"); + assert!(matches!(b_idx, ColumnIndexMetaData::NONE), "{b_idx:?}"); } #[test] @@ -2169,16 +2169,16 @@ mod tests { let column_index = reader.metadata().column_index().unwrap(); assert_eq!(column_index.len(), 1); assert_eq!(column_index[0].len(), 1); - let col_idx = if let Index::BYTE_ARRAY(index) = &column_index[0][0] { - assert_eq!(index.indexes.len(), 1); - &index.indexes[0] + let col_idx = if let ColumnIndexMetaData::BYTE_ARRAY(index) = &column_index[0][0] { + assert_eq!(index.num_pages(), 1); + index } else { unreachable!() }; - assert!(col_idx.repetition_level_histogram().is_none()); - assert!(col_idx.definition_level_histogram().is_some()); - check_def_hist(col_idx.definition_level_histogram().unwrap().values()); + assert!(col_idx.repetition_level_histogram(0).is_none()); + assert!(col_idx.definition_level_histogram(0).is_some()); + check_def_hist(col_idx.definition_level_histogram(0).unwrap()); assert!(reader.metadata().offset_index().is_some()); let offset_index = reader.metadata().offset_index().unwrap(); @@ -2324,15 +2324,15 @@ mod tests { let column_index = reader.metadata().column_index().unwrap(); assert_eq!(column_index.len(), 1); assert_eq!(column_index[0].len(), 1); - let col_idx = if let Index::INT32(index) = &column_index[0][0] { - assert_eq!(index.indexes.len(), 1); - &index.indexes[0] + let col_idx = if let ColumnIndexMetaData::INT32(index) = &column_index[0][0] { + assert_eq!(index.num_pages(), 1); + index } else { unreachable!() }; - check_def_hist(col_idx.definition_level_histogram().unwrap().values()); - check_rep_hist(col_idx.repetition_level_histogram().unwrap().values()); + check_def_hist(col_idx.definition_level_histogram(0).unwrap()); + check_rep_hist(col_idx.repetition_level_histogram(0).unwrap()); assert!(reader.metadata().offset_index().is_some()); let offset_index = reader.metadata().offset_index().unwrap(); diff --git a/parquet/tests/arrow_reader/io/mod.rs b/parquet/tests/arrow_reader/io/mod.rs index b31f295755b0..9cafcd714e89 100644 --- a/parquet/tests/arrow_reader/io/mod.rs +++ b/parquet/tests/arrow_reader/io/mod.rs @@ -49,7 +49,6 @@ use parquet::data_type::AsBytes; use parquet::file::metadata::{ParquetMetaData, ParquetMetaDataReader, ParquetOffsetIndex}; use parquet::file::properties::WriterProperties; use parquet::file::FOOTER_SIZE; -use parquet::format::PageLocation; use parquet::schema::types::SchemaDescriptor; use std::collections::BTreeMap; use std::fmt::Display; @@ -257,7 +256,7 @@ struct TestColumnChunk { dictionary_page_location: Option, /// The location of the data pages in the file - page_locations: Vec, + page_locations: Vec, } /// Information about the pages in a single row group @@ -287,8 +286,11 @@ impl TestRowGroups { .enumerate() .map(|(col_idx, col_meta)| { let column_name = col_meta.column_descr().name().to_string(); - let page_locations = - offset_index[rg_index][col_idx].page_locations().to_vec(); + let page_locations = offset_index[rg_index][col_idx] + .page_locations() + .iter() + .map(parquet::format::PageLocation::from) + .collect(); let dictionary_page_location = col_meta.dictionary_page_offset(); // We can find the byte range of the entire column chunk diff --git a/parquet/tests/encryption/encryption_util.rs b/parquet/tests/encryption/encryption_util.rs index bf7fd08109f6..6817491b3024 100644 --- a/parquet/tests/encryption/encryption_util.rs +++ b/parquet/tests/encryption/encryption_util.rs @@ -191,11 +191,11 @@ pub(crate) fn verify_column_indexes(metadata: &ParquetMetaData) { let column_index = &column_index[0][float_col_idx]; match column_index { - parquet::file::page_index::index::Index::FLOAT(float_index) => { - assert_eq!(float_index.indexes.len(), 1); - assert_eq!(float_index.indexes[0].min, Some(0.0f32)); - assert!(float_index.indexes[0] - .max + parquet::file::page_index::column_index::ColumnIndexMetaData::FLOAT(float_index) => { + assert_eq!(float_index.num_pages(), 1); + assert_eq!(float_index.min_value(0), Some(&0.0f32)); + assert!(float_index + .max_value(0) .is_some_and(|max| (max - 53.9).abs() < 1e-6)); } _ => { From db16cb4d840a9a28324662b3e1a800e097e2db1b Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 27 Aug 2025 12:44:18 -0700 Subject: [PATCH 06/15] [thrift-remodel] Add custom `PageLocation` decoder to speed up decoding of page indexes (#8190) # Which issue does this PR close? **Note: this targets a feature branch, not main** We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. - Part of #5854. # Rationale for this change Add a custom parser for `PageLocation` as the decoding of this struct is one of several hot spots. # What changes are included in this PR? This adds a faster means of obtaining the struct field ids to `ThriftCompactInputProtocol`. For a small struct (3 fields) with all of them required, we can save a good bit of time bypassing `ThriftCompactInputProtocol::read_field_begin` which is very general and can handle out-of-order fields, among other things. By adding a new function `read_field_header`, we can avoid the costly branching that occurs when calculating the new field id (as well as special handling needed for boolean fields). Field validation is then handled on the consuming side while decoding the `PageLocation` struct. Note that to obtain the speed up seen, we need to assume the fields will always be in order, and the field ids will all be encoded as field deltas. This is probably a fairly safe assumption, but there does exist the possibility of custom thrift writers that use absolute field ids. If we encounter such a writer in the wild, this change will need to be reverted. # Are these changes tested? These changes should be covered by existing changes. # Are there any user-facing changes? None beyond the changes in this branch. --- parquet/src/file/page_index/index_reader.rs | 11 ++- parquet/src/file/page_index/offset_index.rs | 88 +++++++++++++++++++++ parquet/src/parquet_thrift.rs | 13 +++ parquet/tests/arrow_reader/io/mod.rs | 5 ++ 4 files changed, 116 insertions(+), 1 deletion(-) diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index f35241689e1c..99e5963b290e 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -133,7 +133,16 @@ pub fn read_offset_indexes( pub(crate) fn decode_offset_index(data: &[u8]) -> Result { let mut prot = ThriftCompactInputProtocol::new(data); - OffsetIndexMetaData::try_from(&mut prot) + + // Try to read fast-path first. If that fails, fall back to slower but more robust + // decoder. + match OffsetIndexMetaData::try_from_fast(&mut prot) { + Ok(offset_index) => Ok(offset_index), + Err(_) => { + prot = ThriftCompactInputProtocol::new(data); + OffsetIndexMetaData::try_from(&mut prot) + } + } } // private struct only used for decoding then discarded diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index d4c196a3ae8b..6cb7539cb573 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -104,4 +104,92 @@ impl OffsetIndexMetaData { self.unencoded_byte_array_data_bytes.clone(), ) } + + // Fast-path read of offset index. This works because we expect all field deltas to be 1, + // and there's no nesting beyond PageLocation, so no need to save the last field id. Like + // read_page_locations(), this will fail if absolute field id's are used. + pub(super) fn try_from_fast<'a>(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + // Offset index is a struct with 2 fields. First field is an array of PageLocations, + // the second an optional array of i64. + + // read field 1 header, then list header, then vec of PageLocations + let (field_type, delta) = prot.read_field_header()?; + if delta != 1 || field_type != FieldType::List as u8 { + return Err(general_err!("error reading OffsetIndex::page_locations")); + } + + // we have to do this manually because we want to use the fast PageLocation decoder + let list_ident = prot.read_list_begin()?; + let mut page_locations = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + page_locations.push(read_page_location(prot)?); + } + + let mut unencoded_byte_array_data_bytes: Option> = None; + + // read second field...if it's Stop we're done + let (mut field_type, delta) = prot.read_field_header()?; + if field_type == FieldType::List as u8 { + if delta != 1 { + return Err(general_err!( + "encountered unknown field while reading OffsetIndex" + )); + } + let vec = Vec::::try_from(&mut *prot)?; + unencoded_byte_array_data_bytes = Some(vec); + + // this one should be Stop + (field_type, _) = prot.read_field_header()?; + } + + if field_type != FieldType::Stop as u8 { + return Err(general_err!( + "encountered unknown field while reading OffsetIndex" + )); + } + + Ok(Self { + page_locations, + unencoded_byte_array_data_bytes, + }) + } +} + +// hand coding this one because it is very time critical + +// Note: this will fail if the fields are either out of order, or if a suboptimal +// encoder doesn't use field deltas. +fn read_page_location<'a>(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + // there are 3 fields, all mandatory, so all field deltas should be 1 + let (field_type, delta) = prot.read_field_header()?; + if delta != 1 || field_type != FieldType::I64 as u8 { + return Err(general_err!("error reading PageLocation::offset")); + } + let offset = prot.read_i64()?; + + let (field_type, delta) = prot.read_field_header()?; + if delta != 1 || field_type != FieldType::I32 as u8 { + return Err(general_err!( + "error reading PageLocation::compressed_page_size" + )); + } + let compressed_page_size = prot.read_i32()?; + + let (field_type, delta) = prot.read_field_header()?; + if delta != 1 || field_type != FieldType::I64 as u8 { + return Err(general_err!("error reading PageLocation::first_row_index")); + } + let first_row_index = prot.read_i64()?; + + // read end of struct...return error if there are unknown fields present + let (field_type, _) = prot.read_field_header()?; + if field_type != FieldType::Stop as u8 { + return Err(general_err!("unexpected field in PageLocation")); + } + + Ok(PageLocation { + offset, + compressed_page_size, + first_row_index, + }) } diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index 7f5fe475217f..2dff498372f0 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -244,6 +244,19 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { Ok(()) } + // This is a specialized version of read_field_begin, solely for use in parsing + // PageLocation structs in the offset index. This function assumes that the delta + // field will always be less than 0xf, fields will be in order, and no boolean fields + // will be read. This also skips validation of the field type. + // + // Returns a tuple of (field_type, field_delta) + pub(crate) fn read_field_header(&mut self) -> Result<(u8, u8)> { + let field_type = self.read_byte()?; + let field_delta = (field_type & 0xf0) >> 4; + let field_type = field_type & 0xf; + Ok((field_type, field_delta)) + } + pub(crate) fn read_field_begin(&mut self) -> Result { // we can read at least one byte, which is: // - the type diff --git a/parquet/tests/arrow_reader/io/mod.rs b/parquet/tests/arrow_reader/io/mod.rs index 9cafcd714e89..bfdb9467e20c 100644 --- a/parquet/tests/arrow_reader/io/mod.rs +++ b/parquet/tests/arrow_reader/io/mod.rs @@ -298,6 +298,11 @@ impl TestRowGroups { let start_offset = start_offset as usize; let end_offset = start_offset + length as usize; + let page_locations = page_locations + .iter() + .map(parquet::format::PageLocation::from) + .collect(); + TestColumnChunk { name: column_name.clone(), location: start_offset..end_offset, From 64a59c3bffe43f3f20c4e32334baf1920d3cb0e4 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Mon, 8 Sep 2025 16:14:29 -0700 Subject: [PATCH 07/15] finish merge --- parquet/tests/arrow_reader/io/mod.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/parquet/tests/arrow_reader/io/mod.rs b/parquet/tests/arrow_reader/io/mod.rs index bfdb9467e20c..749d075d7188 100644 --- a/parquet/tests/arrow_reader/io/mod.rs +++ b/parquet/tests/arrow_reader/io/mod.rs @@ -287,10 +287,7 @@ impl TestRowGroups { .map(|(col_idx, col_meta)| { let column_name = col_meta.column_descr().name().to_string(); let page_locations = offset_index[rg_index][col_idx] - .page_locations() - .iter() - .map(parquet::format::PageLocation::from) - .collect(); + .page_locations().to_vec(); let dictionary_page_location = col_meta.dictionary_page_offset(); // We can find the byte range of the entire column chunk From 14046080c1311b716eec2d6dc62b66ece17d31c6 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 10 Sep 2025 14:50:06 -0700 Subject: [PATCH 08/15] [thrift-remodel] Add thrift write support (#8237) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change Begins adding custom thrift write support. # What changes are included in this PR? Adds traits to aid in writing of thrift and modifies thrift macros to generate writing code. # Are these changes tested? Yes, adds some roundtrip tests to validate encoded data can be decoded to the same state. # Are there any user-facing changes? No --- parquet/src/basic.rs | 375 ++++++++++++--- parquet/src/file/column_crypto_metadata.rs | 28 +- parquet/src/file/metadata/mod.rs | 6 +- parquet/src/file/metadata/thrift_gen.rs | 48 +- parquet/src/file/page_encoding_stats.rs | 7 +- parquet/src/file/page_index/index_reader.rs | 6 +- parquet/src/file/page_index/offset_index.rs | 40 +- parquet/src/parquet_macros.rs | 162 ++++++- parquet/src/parquet_thrift.rs | 482 +++++++++++++++++++- parquet/tests/arrow_reader/io/mod.rs | 3 +- 10 files changed, 1085 insertions(+), 72 deletions(-) diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index c325cf5dbf2b..5fffb56cdf74 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -20,11 +20,15 @@ //! Refer to [`parquet.thrift`](https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift) //! file to see raw definitions. +use std::io::Write; use std::str::FromStr; use std::{fmt, str}; pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel}; -use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::parquet_thrift::{ + ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, + WriteThriftField, +}; use crate::{thrift_enum, thrift_struct, thrift_union_all_empty}; use crate::errors::{ParquetError, Result}; @@ -193,6 +197,28 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ConvertedType { } } +impl WriteThrift for ConvertedType { + const ELEMENT_TYPE: ElementType = ElementType::I32; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + // because we've added NONE, the variant values are off by 1, so correct that here + writer.write_i32(*self as i32 - 1) + } +} + +impl WriteThriftField for ConvertedType { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } +} + // ---------------------------------------------------------------------- // Mirrors thrift union `crate::format::TimeUnit` @@ -450,35 +476,137 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for LogicalType { } } +impl WriteThrift for LogicalType { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + match self { + Self::String => { + writer.write_empty_struct(1, 0)?; + } + Self::Map => { + writer.write_empty_struct(2, 0)?; + } + Self::List => { + writer.write_empty_struct(3, 0)?; + } + Self::Enum => { + writer.write_empty_struct(4, 0)?; + } + Self::Decimal { scale, precision } => { + DecimalType { + scale: *scale, + precision: *precision, + } + .write_thrift_field(writer, 5, 0)?; + } + Self::Date => { + writer.write_empty_struct(6, 0)?; + } + Self::Time { + is_adjusted_to_u_t_c, + unit, + } => { + TimeType { + is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c, + unit: *unit, + } + .write_thrift_field(writer, 7, 0)?; + } + Self::Timestamp { + is_adjusted_to_u_t_c, + unit, + } => { + TimestampType { + is_adjusted_to_u_t_c: *is_adjusted_to_u_t_c, + unit: *unit, + } + .write_thrift_field(writer, 8, 0)?; + } + Self::Integer { + bit_width, + is_signed, + } => { + IntType { + bit_width: *bit_width, + is_signed: *is_signed, + } + .write_thrift_field(writer, 10, 0)?; + } + Self::Unknown => { + writer.write_empty_struct(11, 0)?; + } + Self::Json => { + writer.write_empty_struct(12, 0)?; + } + Self::Bson => { + writer.write_empty_struct(13, 0)?; + } + Self::Uuid => { + writer.write_empty_struct(14, 0)?; + } + Self::Float16 => { + writer.write_empty_struct(15, 0)?; + } + Self::Variant { + specification_version, + } => { + VariantType { + specification_version: *specification_version, + } + .write_thrift_field(writer, 16, 0)?; + } + Self::Geometry { crs } => { + GeometryType { + crs: crs.as_ref().map(|s| s.as_str()), + } + .write_thrift_field(writer, 17, 0)?; + } + Self::Geography { crs, algorithm } => { + GeographyType { + crs: crs.as_ref().map(|s| s.as_str()), + algorithm: *algorithm, + } + .write_thrift_field(writer, 18, 0)?; + } + _ => return Err(nyi_err!("logical type")), + } + writer.write_struct_end() + } +} + +impl WriteThriftField for LogicalType { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } +} + // ---------------------------------------------------------------------- // Mirrors thrift enum `crate::format::FieldRepetitionType` // // Cannot use macro since the name is changed +thrift_enum!( /// Representation of field types in schema. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[allow(non_camel_case_types)] -pub enum Repetition { - /// Field is required (can not be null) and each record has exactly 1 value. - REQUIRED, - /// Field is optional (can be null) and each record has 0 or 1 values. - OPTIONAL, - /// Field is repeated and can contain 0 or more values. - REPEATED, +enum FieldRepetitionType { + /// This field is required (can not be null) and each row has exactly 1 value. + REQUIRED = 0; + /// The field is optional (can be null) and each row has 0 or 1 values. + OPTIONAL = 1; + /// The field is repeated and can contain 0 or more values. + REPEATED = 2; } +); -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Repetition { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - let val = prot.read_i32()?; - Ok(match val { - 0 => Self::REQUIRED, - 1 => Self::OPTIONAL, - 2 => Self::REPEATED, - _ => return Err(general_err!("Unexpected FieldRepetitionType {}", val)), - }) - } -} +/// Type alias for thrift `FieldRepetitionType` +pub type Repetition = FieldRepetitionType; // ---------------------------------------------------------------------- // Mirrors thrift enum `crate::format::Encoding` @@ -646,6 +774,40 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Compression { } } +// TODO(ets): explore replacing this with a thrift_enum!(ThriftCompression) for the serialization +// and then provide `From` impls to convert back and forth. This is necessary due to the addition +// of compression level to some variants. +impl WriteThrift for Compression { + const ELEMENT_TYPE: ElementType = ElementType::I32; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + let id: i32 = match *self { + Self::UNCOMPRESSED => 0, + Self::SNAPPY => 1, + Self::GZIP(_) => 2, + Self::LZO => 3, + Self::BROTLI(_) => 4, + Self::LZ4 => 5, + Self::ZSTD(_) => 6, + Self::LZ4_RAW => 7, + }; + writer.write_i32(id) + } +} + +impl WriteThriftField for Compression { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } +} + impl Compression { /// Returns the codec type of this compression setting as a string, without the compression /// level. @@ -993,16 +1155,26 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ColumnOrder { } } -// ---------------------------------------------------------------------- -// Display handlers +impl WriteThrift for ColumnOrder { + const ELEMENT_TYPE: ElementType = ElementType::Struct; -impl fmt::Display for ConvertedType { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{self:?}") + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + match *self { + Self::TYPE_DEFINED_ORDER(_) => { + writer.write_field_begin(FieldType::Struct, 1, 0)?; + writer.write_struct_end()?; + } + _ => return Err(general_err!("Attempt to write undefined ColumnOrder")), + } + // write end of struct for this union + writer.write_struct_end() } } -impl fmt::Display for Repetition { +// ---------------------------------------------------------------------- +// Display handlers + +impl fmt::Display for ConvertedType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{self:?}") } @@ -1259,37 +1431,6 @@ impl From> for ConvertedType { } } -// ---------------------------------------------------------------------- -// crate::format::FieldRepetitionType <=> Repetition conversion - -impl TryFrom for Repetition { - type Error = ParquetError; - - fn try_from(value: crate::format::FieldRepetitionType) -> Result { - Ok(match value { - crate::format::FieldRepetitionType::REQUIRED => Repetition::REQUIRED, - crate::format::FieldRepetitionType::OPTIONAL => Repetition::OPTIONAL, - crate::format::FieldRepetitionType::REPEATED => Repetition::REPEATED, - _ => { - return Err(general_err!( - "unexpected parquet repetition type: {}", - value.0 - )) - } - }) - } -} - -impl From for crate::format::FieldRepetitionType { - fn from(value: Repetition) -> Self { - match value { - Repetition::REQUIRED => crate::format::FieldRepetitionType::REQUIRED, - Repetition::OPTIONAL => crate::format::FieldRepetitionType::OPTIONAL, - Repetition::REPEATED => crate::format::FieldRepetitionType::REPEATED, - } - } -} - // ---------------------------------------------------------------------- // crate::format::CompressionCodec <=> Compression conversion @@ -1442,6 +1583,7 @@ impl str::FromStr for LogicalType { #[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module mod tests { use super::*; + use crate::parquet_thrift::tests::test_roundtrip; #[test] fn test_display_type() { @@ -1549,6 +1691,32 @@ mod tests { ); } + #[test] + fn test_converted_type_roundtrip() { + test_roundtrip(ConvertedType::UTF8); + test_roundtrip(ConvertedType::MAP); + test_roundtrip(ConvertedType::MAP_KEY_VALUE); + test_roundtrip(ConvertedType::LIST); + test_roundtrip(ConvertedType::ENUM); + test_roundtrip(ConvertedType::DECIMAL); + test_roundtrip(ConvertedType::DATE); + test_roundtrip(ConvertedType::TIME_MILLIS); + test_roundtrip(ConvertedType::TIME_MICROS); + test_roundtrip(ConvertedType::TIMESTAMP_MILLIS); + test_roundtrip(ConvertedType::TIMESTAMP_MICROS); + test_roundtrip(ConvertedType::UINT_8); + test_roundtrip(ConvertedType::UINT_16); + test_roundtrip(ConvertedType::UINT_32); + test_roundtrip(ConvertedType::UINT_64); + test_roundtrip(ConvertedType::INT_8); + test_roundtrip(ConvertedType::INT_16); + test_roundtrip(ConvertedType::INT_32); + test_roundtrip(ConvertedType::INT_64); + test_roundtrip(ConvertedType::JSON); + test_roundtrip(ConvertedType::BSON); + test_roundtrip(ConvertedType::INTERVAL); + } + #[test] fn test_display_converted_type() { assert_eq!(ConvertedType::NONE.to_string(), "NONE"); @@ -2106,6 +2274,89 @@ mod tests { ); } + #[test] + fn test_logical_type_roundtrip() { + test_roundtrip(LogicalType::String); + test_roundtrip(LogicalType::Map); + test_roundtrip(LogicalType::List); + test_roundtrip(LogicalType::Enum); + test_roundtrip(LogicalType::Decimal { + scale: 0, + precision: 20, + }); + test_roundtrip(LogicalType::Date); + test_roundtrip(LogicalType::Time { + is_adjusted_to_u_t_c: true, + unit: TimeUnit::MICROS, + }); + test_roundtrip(LogicalType::Time { + is_adjusted_to_u_t_c: false, + unit: TimeUnit::MILLIS, + }); + test_roundtrip(LogicalType::Time { + is_adjusted_to_u_t_c: false, + unit: TimeUnit::NANOS, + }); + test_roundtrip(LogicalType::Timestamp { + is_adjusted_to_u_t_c: false, + unit: TimeUnit::MICROS, + }); + test_roundtrip(LogicalType::Timestamp { + is_adjusted_to_u_t_c: true, + unit: TimeUnit::MILLIS, + }); + test_roundtrip(LogicalType::Timestamp { + is_adjusted_to_u_t_c: true, + unit: TimeUnit::NANOS, + }); + test_roundtrip(LogicalType::Integer { + bit_width: 8, + is_signed: true, + }); + test_roundtrip(LogicalType::Integer { + bit_width: 16, + is_signed: false, + }); + test_roundtrip(LogicalType::Integer { + bit_width: 32, + is_signed: true, + }); + test_roundtrip(LogicalType::Integer { + bit_width: 64, + is_signed: false, + }); + test_roundtrip(LogicalType::Json); + test_roundtrip(LogicalType::Bson); + test_roundtrip(LogicalType::Uuid); + test_roundtrip(LogicalType::Float16); + test_roundtrip(LogicalType::Variant { + specification_version: Some(1), + }); + test_roundtrip(LogicalType::Variant { + specification_version: None, + }); + test_roundtrip(LogicalType::Geometry { + crs: Some("foo".to_owned()), + }); + test_roundtrip(LogicalType::Geometry { crs: None }); + test_roundtrip(LogicalType::Geography { + crs: Some("foo".to_owned()), + algorithm: Some(EdgeInterpolationAlgorithm::ANDOYER), + }); + test_roundtrip(LogicalType::Geography { + crs: None, + algorithm: Some(EdgeInterpolationAlgorithm::KARNEY), + }); + test_roundtrip(LogicalType::Geography { + crs: Some("foo".to_owned()), + algorithm: None, + }); + test_roundtrip(LogicalType::Geography { + crs: None, + algorithm: None, + }); + } + #[test] fn test_display_repetition() { assert_eq!(Repetition::REQUIRED.to_string(), "REQUIRED"); @@ -2411,6 +2662,12 @@ mod tests { assert_eq!(ColumnOrder::UNDEFINED.to_string(), "UNDEFINED"); } + #[test] + fn test_column_order_roundtrip() { + // SortOrder::SIGNED is the default on read. + test_roundtrip(ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED)) + } + #[test] fn test_column_order_get_logical_type_sort_order() { // Helper to check the order in a list of values. diff --git a/parquet/src/file/column_crypto_metadata.rs b/parquet/src/file/column_crypto_metadata.rs index 95cbc65cf716..5bba07357947 100644 --- a/parquet/src/file/column_crypto_metadata.rs +++ b/parquet/src/file/column_crypto_metadata.rs @@ -17,13 +17,18 @@ //! Column chunk encryption metadata +use std::io::Write; + use crate::errors::{ParquetError, Result}; use crate::format::{ ColumnCryptoMetaData as TColumnCryptoMetaData, EncryptionWithColumnKey as TEncryptionWithColumnKey, EncryptionWithFooterKey as TEncryptionWithFooterKey, }; -use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::parquet_thrift::{ + ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, + WriteThriftField, +}; use crate::{thrift_struct, thrift_union}; // define this and ColumnCryptoMetadata here so they're only defined when @@ -84,6 +89,7 @@ pub fn to_thrift(column_crypto_metadata: &ColumnCryptoMetaData) -> TColumnCrypto #[cfg(test)] mod tests { use super::*; + use crate::parquet_thrift::tests::test_roundtrip; #[test] fn test_encryption_with_footer_key_from_thrift() { @@ -101,4 +107,24 @@ mod tests { assert_eq!(try_from_thrift(&to_thrift(&metadata)).unwrap(), metadata); } + + #[test] + fn test_column_crypto_roundtrip() { + test_roundtrip(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY); + + let path_in_schema = vec!["foo".to_owned(), "bar".to_owned(), "really".to_owned()]; + let key_metadata = vec![1u8; 32]; + test_roundtrip(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY( + EncryptionWithColumnKey { + path_in_schema: path_in_schema.clone(), + key_metadata: None, + }, + )); + test_roundtrip(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY( + EncryptionWithColumnKey { + path_in_schema, + key_metadata: Some(key_metadata), + }, + )); + } } diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 69cdf8f10714..8b06fe676308 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -125,7 +125,10 @@ use crate::{ }; use crate::{ basic::{ColumnOrder, Compression, Encoding, Type}, - parquet_thrift::{FieldType, ThriftCompactInputProtocol}, + parquet_thrift::{ + ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, + WriteThrift, WriteThriftField, + }, }; use crate::{ data_type::private::ParquetValueType, file::page_index::offset_index::OffsetIndexMetaData, @@ -135,6 +138,7 @@ use crate::{ thrift_struct, }; pub use reader::{FooterTail, PageIndexPolicy, ParquetMetaDataReader}; +use std::io::Write; use std::ops::Range; use std::sync::Arc; pub use writer::ParquetMetaDataWriter; diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs index 3888d247df1c..f15a5a6b16d8 100644 --- a/parquet/src/file/metadata/thrift_gen.rs +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -17,6 +17,7 @@ // a collection of generated structs used to parse thrift metadata +use std::io::Write; use std::sync::Arc; #[cfg(feature = "encryption")] @@ -33,7 +34,10 @@ use crate::{ page_encoding_stats::PageEncodingStats, statistics::ValueStatistics, }, - parquet_thrift::{FieldType, ThriftCompactInputProtocol}, + parquet_thrift::{ + ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, + WriteThrift, WriteThriftField, + }, schema::types::{parquet_schema_from_array, ColumnDescriptor, SchemaDescriptor}, thrift_struct, util::bit_util::FromBytes, @@ -507,3 +511,45 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ParquetMetaData { Ok(ParquetMetaData::new(fmd, row_groups)) } } + +#[cfg(test)] +mod tests { + use crate::file::metadata::thrift_gen::BoundingBox; + use crate::parquet_thrift::tests::test_roundtrip; + + #[test] + fn test_bounding_box_roundtrip() { + test_roundtrip(BoundingBox { + xmin: 0.1.into(), + xmax: 10.3.into(), + ymin: 0.001.into(), + ymax: 128.5.into(), + zmin: None, + zmax: None, + mmin: None, + mmax: None, + }); + + test_roundtrip(BoundingBox { + xmin: 0.1.into(), + xmax: 10.3.into(), + ymin: 0.001.into(), + ymax: 128.5.into(), + zmin: Some(11.0.into()), + zmax: Some(1300.0.into()), + mmin: None, + mmax: None, + }); + + test_roundtrip(BoundingBox { + xmin: 0.1.into(), + xmax: 10.3.into(), + ymin: 0.001.into(), + ymax: 128.5.into(), + zmin: Some(11.0.into()), + zmax: Some(1300.0.into()), + mmin: Some(3.7.into()), + mmax: Some(42.0.into()), + }); + } +} diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs index 281954d939dd..2d433dc9b3f1 100644 --- a/parquet/src/file/page_encoding_stats.rs +++ b/parquet/src/file/page_encoding_stats.rs @@ -17,9 +17,14 @@ //! Per-page encoding information. +use std::io::Write; + use crate::basic::{Encoding, PageType}; use crate::errors::{ParquetError, Result}; -use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::parquet_thrift::{ + ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, + WriteThriftField, +}; use crate::thrift_struct; // TODO: This should probably all be moved to thrift_gen diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index 99e5963b290e..e9cf119224c9 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -26,8 +26,12 @@ use crate::file::page_index::column_index::{ }; use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::file::reader::ChunkReader; -use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use crate::parquet_thrift::{ + ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, + WriteThriftField, +}; use crate::thrift_struct; +use std::io::Write; use std::ops::Range; /// Computes the covering range of two optional ranges diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index 6cb7539cb573..ac2620af09d8 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -19,7 +19,12 @@ //! //! [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -use crate::parquet_thrift::{FieldType, ThriftCompactInputProtocol}; +use std::io::Write; + +use crate::parquet_thrift::{ + ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, + WriteThriftField, +}; use crate::{ errors::{ParquetError, Result}, thrift_struct, @@ -193,3 +198,36 @@ fn read_page_location<'a>(prot: &mut ThriftCompactInputProtocol<'a>) -> Result

(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_i32(*self as i32) + } + } + + impl WriteThriftField for $identifier { + fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { + writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } + } + // TODO: remove when we finally get rid of the format module impl TryFrom for $identifier { type Error = ParquetError; @@ -119,6 +139,26 @@ macro_rules! thrift_union_all_empty { } } + impl WriteThrift for $identifier { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + match *self { + $(Self::$field_name => writer.write_empty_struct($field_id, 0)?,)* + }; + // write end of struct for this union + writer.write_struct_end() + } + } + + impl WriteThriftField for $identifier { + fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { + writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } + } + // TODO: remove when we finally get rid of the format module impl From for $identifier { fn from(value: crate::format::$identifier) -> Self { @@ -183,9 +223,51 @@ macro_rules! thrift_union { Ok(ret) } } + + impl WriteThrift for $identifier { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + match self { + $($crate::__thrift_write_variant_lhs!($field_name $($field_type)?, variant_val) => + $crate::__thrift_write_variant_rhs!($field_id $($field_type)?, writer, variant_val),)* + }; + writer.write_struct_end() + } + } + + impl WriteThriftField for $identifier { + fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { + writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } + } } } +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_write_variant_lhs { + ($field_name:ident $field_type:ident, $val:tt) => { + Self::$field_name($val) + }; + ($field_name:ident, $val:tt) => { + Self::$field_name + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_write_variant_rhs { + ($field_id:literal $field_type:ident, $writer:tt, $val:ident) => { + $val.write_thrift_field($writer, $field_id, 0)? + }; + ($field_id:literal, $writer:tt, $val:tt) => { + $writer.write_empty_struct($field_id, 0)? + }; +} + /// macro to generate rust structs from a thrift struct definition /// unlike enum and union, this macro will allow for visibility specifier /// can also take optional lifetime for struct and elements within it (need e.g.) @@ -228,14 +310,86 @@ macro_rules! thrift_struct { }) } } + + impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + #[allow(unused_assignments)] + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + let mut last_field_id = 0i16; + $($crate::__thrift_write_required_or_optional_field!($required_or_optional $field_name, $field_id, $field_type, self, writer, last_field_id);)* + writer.write_struct_end() + } + } + + impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? { + fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { + writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } + } } } -/// macro to use when decoding struct fields +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_write_required_or_optional_field { + (required $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => { + $crate::__thrift_write_required_field!( + $field_type, + $field_name, + $field_id, + $self, + $writer, + $last_id + ) + }; + (optional $field_name:ident, $field_id:literal, $field_type:ident, $self:tt, $writer:tt, $last_id:tt) => { + $crate::__thrift_write_optional_field!( + $field_type, + $field_name, + $field_id, + $self, + $writer, + $last_id + ) + }; +} + +#[doc(hidden)] #[macro_export] -macro_rules! thrift_read_field { - ($field_name:ident, $prot:tt, $field_type:ident) => { - $field_name = Some($crate::__thrift_read_field!($prot, $field_type)); +macro_rules! __thrift_write_required_field { + (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => { + $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?; + $writer.write_bytes($self.$field_name)?; + $last_id = $field_id; + }; + ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:ident, $last_id:ident) => { + $last_id = $self + .$field_name + .write_thrift_field($writer, $field_id, $last_id)?; + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_write_optional_field { + (binary, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => { + if $self.$field_name.is_some() { + $writer.write_field_begin(FieldType::Binary, $field_id, $last_id)?; + $writer.write_bytes($self.$field_name.as_ref().unwrap())?; + $last_id = $field_id; + } + }; + ($field_type:ident, $field_name:ident, $field_id:literal, $self:ident, $writer:tt, $last_id:tt) => { + if $self.$field_name.is_some() { + $last_id = $self + .$field_name + .as_ref() + .unwrap() + .write_thrift_field($writer, $field_id, $last_id)?; + } }; } diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index 2dff498372f0..9b83c0a01b8d 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -20,7 +20,7 @@ // to not allocate byte arrays or strings. #![allow(dead_code)] -use std::cmp::Ordering; +use std::{cmp::Ordering, io::Write}; use crate::errors::{ParquetError, Result}; @@ -31,6 +31,12 @@ use crate::errors::{ParquetError, Result}; #[derive(Debug, Clone, Copy, PartialEq)] pub struct OrderedF64(f64); +impl From for OrderedF64 { + fn from(value: f64) -> Self { + Self(value) + } +} + impl From for f64 { fn from(value: OrderedF64) -> Self { value.0 @@ -539,3 +545,477 @@ where Ok(res) } } + +///////////////////////// +// thrift compact output + +/// Low-level object used to serialize structs to the Thrift [compact output] protocol. +/// +/// This struct serves as a wrapper around a [`Write`] object, to which thrift encoded data +/// will written. The implementation provides functions to write Thrift primitive types, as well +/// as functions used in the encoding of lists and structs. This should rarely be used directly, +/// but is instead intended for use by implementers of [`WriteThrift`] and [`WriteThriftField`]. +/// +/// [compact output]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md +pub(crate) struct ThriftCompactOutputProtocol { + writer: W, +} + +impl ThriftCompactOutputProtocol { + /// Create a new `ThriftCompactOutputProtocol` wrapping the byte sink `writer`. + pub(crate) fn new(writer: W) -> Self { + Self { writer } + } + + /// Return a reference to the underlying `Write`. + pub(crate) fn inner(&self) -> &W { + &self.writer + } + + /// Write a single byte to the output stream. + fn write_byte(&mut self, b: u8) -> Result<()> { + self.writer.write_all(&[b])?; + Ok(()) + } + + /// Write the given `u64` as a ULEB128 encoded varint. + fn write_vlq(&mut self, val: u64) -> Result<()> { + let mut v = val; + while v > 0x7f { + self.write_byte(v as u8 | 0x80)?; + v >>= 7; + } + self.write_byte(v as u8) + } + + /// Write the given `i64` as a zig-zag encoded varint. + fn write_zig_zag(&mut self, val: i64) -> Result<()> { + let s = (val < 0) as i64; + self.write_vlq((((val ^ -s) << 1) + s) as u64) + } + + /// Used to mark the start of a Thrift struct field of type `field_type`. `last_field_id` + /// is used to compute a delta to the given `field_id` per the compact protocol [spec]. + /// + /// [spec]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#struct-encoding + pub(crate) fn write_field_begin( + &mut self, + field_type: FieldType, + field_id: i16, + last_field_id: i16, + ) -> Result<()> { + let delta = field_id.wrapping_sub(last_field_id); + if delta > 0 && delta <= 0xf { + self.write_byte((delta as u8) << 4 | field_type as u8) + } else { + self.write_byte(field_type as u8)?; + self.write_i16(field_id) + } + } + + /// Used to indicate the start of a list of `element_type` elements. + pub(crate) fn write_list_begin(&mut self, element_type: ElementType, len: usize) -> Result<()> { + if len < 15 { + self.write_byte((len as u8) << 4 | element_type as u8) + } else { + self.write_byte(0xf0u8 | element_type as u8)?; + self.write_vlq(len as _) + } + } + + /// Used to mark the end of a struct. This must be called after all fields of the struct have + /// been written. + pub(crate) fn write_struct_end(&mut self) -> Result<()> { + self.write_byte(0) + } + + /// Serialize a slice of `u8`s. This will encode a length, and then write the bytes without + /// further encoding. + pub(crate) fn write_bytes(&mut self, val: &[u8]) -> Result<()> { + self.write_vlq(val.len() as u64)?; + self.writer.write_all(val)?; + Ok(()) + } + + /// Short-cut method used to encode structs that have no fields (often used in Thrift unions). + /// This simply encodes the field id and then immediately writes the end-of-struct marker. + pub(crate) fn write_empty_struct(&mut self, field_id: i16, last_field_id: i16) -> Result { + self.write_field_begin(FieldType::Struct, field_id, last_field_id)?; + self.write_struct_end()?; + Ok(last_field_id) + } + + /// Write a boolean value. + pub(crate) fn write_bool(&mut self, val: bool) -> Result<()> { + match val { + true => self.write_byte(1), + false => self.write_byte(2), + } + } + + /// Write a zig-zag encoded `i8` value. + pub(crate) fn write_i8(&mut self, val: i8) -> Result<()> { + self.write_byte(val as u8) + } + + /// Write a zig-zag encoded `i16` value. + pub(crate) fn write_i16(&mut self, val: i16) -> Result<()> { + self.write_zig_zag(val as _) + } + + /// Write a zig-zag encoded `i32` value. + pub(crate) fn write_i32(&mut self, val: i32) -> Result<()> { + self.write_zig_zag(val as _) + } + + /// Write a zig-zag encoded `i64` value. + pub(crate) fn write_i64(&mut self, val: i64) -> Result<()> { + self.write_zig_zag(val as _) + } + + /// Write a double value. + pub(crate) fn write_double(&mut self, val: f64) -> Result<()> { + self.writer.write_all(&val.to_le_bytes())?; + Ok(()) + } +} + +/// Trait implemented by objects that are to be serialized to a Thrift [compact output] protocol +/// stream. Implementations are also provided for primitive Thrift types. +/// +/// [compact output]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md +pub(crate) trait WriteThrift { + /// The [`ElementType`] to use when a list of this object is written. + const ELEMENT_TYPE: ElementType; + + /// Serialize this object to the given `writer`. + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()>; +} + +/// Implementation for a vector of thrift serializable objects that implement [`WriteThrift`]. +/// This will write the necessary list header and then serialize the elements one-at-a-time. +impl WriteThrift for Vec +where + T: WriteThrift, +{ + const ELEMENT_TYPE: ElementType = ElementType::List; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_list_begin(T::ELEMENT_TYPE, self.len())?; + for item in self { + item.write_thrift(writer)?; + } + Ok(()) + } +} + +impl WriteThrift for bool { + const ELEMENT_TYPE: ElementType = ElementType::Bool; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_bool(*self) + } +} + +impl WriteThrift for i8 { + const ELEMENT_TYPE: ElementType = ElementType::Byte; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_i8(*self) + } +} + +impl WriteThrift for i16 { + const ELEMENT_TYPE: ElementType = ElementType::I16; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_i16(*self) + } +} + +impl WriteThrift for i32 { + const ELEMENT_TYPE: ElementType = ElementType::I32; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_i32(*self) + } +} + +impl WriteThrift for i64 { + const ELEMENT_TYPE: ElementType = ElementType::I64; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_i64(*self) + } +} + +impl WriteThrift for OrderedF64 { + const ELEMENT_TYPE: ElementType = ElementType::Double; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_double(self.0) + } +} + +impl WriteThrift for &[u8] { + const ELEMENT_TYPE: ElementType = ElementType::Binary; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_bytes(self) + } +} + +impl WriteThrift for &str { + const ELEMENT_TYPE: ElementType = ElementType::Binary; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_bytes(self.as_bytes()) + } +} + +impl WriteThrift for String { + const ELEMENT_TYPE: ElementType = ElementType::Binary; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + writer.write_bytes(self.as_bytes()) + } +} + +/// Trait implemented by objects that are fields of Thrift structs. +/// +/// For example, given the Thrift struct definition +/// ```ignore +/// struct MyStruct { +/// 1: required i32 field1 +/// 2: optional bool field2 +/// 3: optional OtherStruct field3 +/// } +/// ``` +/// +/// which becomes in Rust +/// ```no_run +/// # struct OtherStruct {} +/// struct MyStruct { +/// field1: i32, +/// field2: Option, +/// field3: Option, +/// } +/// ``` +/// the impl of `WriteThrift` for `MyStruct` will use the `WriteThriftField` impls for `i32`, +/// `bool`, and `OtherStruct`. +/// +/// ```ignore +/// impl WriteThrift for MyStruct { +/// fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { +/// let mut last_field_id = 0i16; +/// last_field_id = self.field1.write_thrift_field(writer, 1, last_field_id)?; +/// if self.field2.is_some() { +/// // if field2 is `None` then this assignment won't happen and last_field_id will remain +/// // `1` when writing `field3` +/// last_field_id = self.field2.write_thrift_field(writer, 2, last_field_id)?; +/// } +/// if self.field3.is_some() { +/// // no need to assign last_field_id since this is the final field. +/// self.field3.write_thrift_field(writer, 3, last_field_id)?; +/// } +/// writer.write_struct_end() +/// } +/// } +/// ``` +/// +pub(crate) trait WriteThriftField { + /// Used to write struct fields (which may be primitive or IDL defined types). This will + /// write the field marker for the given `field_id`, using `last_field_id` to compute the + /// field delta used by the Thrift [compact protocol]. On success this will return `field_id` + /// to be used in chaining. + /// + /// [compact protocol]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#struct-encoding + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result; +} + +impl WriteThriftField for bool { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + // boolean only writes the field header + match *self { + true => writer.write_field_begin(FieldType::BooleanTrue, field_id, last_field_id)?, + false => writer.write_field_begin(FieldType::BooleanFalse, field_id, last_field_id)?, + } + Ok(field_id) + } +} + +impl WriteThriftField for i8 { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::Byte, field_id, last_field_id)?; + writer.write_i8(*self)?; + Ok(field_id) + } +} + +impl WriteThriftField for i16 { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::I16, field_id, last_field_id)?; + writer.write_i16(*self)?; + Ok(field_id) + } +} + +impl WriteThriftField for i32 { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; + writer.write_i32(*self)?; + Ok(field_id) + } +} + +impl WriteThriftField for i64 { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::I64, field_id, last_field_id)?; + writer.write_i64(*self)?; + Ok(field_id) + } +} + +impl WriteThriftField for OrderedF64 { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::Double, field_id, last_field_id)?; + writer.write_double(self.0)?; + Ok(field_id) + } +} + +impl WriteThriftField for &[u8] { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?; + writer.write_bytes(self)?; + Ok(field_id) + } +} + +impl WriteThriftField for &str { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?; + writer.write_bytes(self.as_bytes())?; + Ok(field_id) + } +} + +impl WriteThriftField for String { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::Binary, field_id, last_field_id)?; + writer.write_bytes(self.as_bytes())?; + Ok(field_id) + } +} + +impl WriteThriftField for Vec +where + T: WriteThrift, +{ + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::List, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } +} + +#[cfg(test)] +pub(crate) mod tests { + use crate::basic::{TimeUnit, Type}; + + use super::*; + use std::fmt::Debug; + + pub(crate) fn test_roundtrip(val: T) + where + T: for<'a> TryFrom<&'a mut ThriftCompactInputProtocol<'a>> + + WriteThrift + + PartialEq + + Debug, + for<'a> >>::Error: Debug, + { + let buf = Vec::::new(); + let mut writer = ThriftCompactOutputProtocol::new(buf); + val.write_thrift(&mut writer).unwrap(); + + //println!("serialized: {:x?}", writer.inner()); + + let mut prot = ThriftCompactInputProtocol::new(writer.inner()); + let read_val = T::try_from(&mut prot).unwrap(); + assert_eq!(val, read_val); + } + + #[test] + fn test_enum_roundtrip() { + test_roundtrip(Type::BOOLEAN); + test_roundtrip(Type::INT32); + test_roundtrip(Type::INT64); + test_roundtrip(Type::INT96); + test_roundtrip(Type::FLOAT); + test_roundtrip(Type::DOUBLE); + test_roundtrip(Type::BYTE_ARRAY); + test_roundtrip(Type::FIXED_LEN_BYTE_ARRAY); + } + + #[test] + fn test_union_all_empty_roundtrip() { + test_roundtrip(TimeUnit::MILLIS); + test_roundtrip(TimeUnit::MICROS); + test_roundtrip(TimeUnit::NANOS); + } +} diff --git a/parquet/tests/arrow_reader/io/mod.rs b/parquet/tests/arrow_reader/io/mod.rs index 749d075d7188..051a61de5075 100644 --- a/parquet/tests/arrow_reader/io/mod.rs +++ b/parquet/tests/arrow_reader/io/mod.rs @@ -286,8 +286,7 @@ impl TestRowGroups { .enumerate() .map(|(col_idx, col_meta)| { let column_name = col_meta.column_descr().name().to_string(); - let page_locations = offset_index[rg_index][col_idx] - .page_locations().to_vec(); + let page_locations = offset_index[rg_index][col_idx].page_locations(); let dictionary_page_location = col_meta.dictionary_page_offset(); // We can find the byte range of the entire column chunk From 8f5be54a2ccaa66e052d29085b387bf7b85ef10b Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Fri, 12 Sep 2025 13:43:01 -0700 Subject: [PATCH 09/15] [thrift-remodel] Begin replacing file metadata reader and convert footer decryption code (#8313) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change Continuing the remodel # What changes are included in this PR? This begins the process of replacing the current footer parsing code with the new version. As part of this much of the decryption machinery also needed to be changed. # Are these changes tested? Should be covered by existing tests # Are there any user-facing changes? Yes --- parquet/src/file/metadata/mod.rs | 94 +----- parquet/src/file/metadata/reader.rs | 295 +---------------- parquet/src/file/metadata/thrift_gen.rs | 413 +++++++++++++++++++++++- parquet/src/file/serialized_reader.rs | 8 +- parquet/src/parquet_macros.rs | 16 +- parquet/tests/arrow_reader/bad_data.rs | 5 +- 6 files changed, 436 insertions(+), 395 deletions(-) diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 8b06fe676308..0c4372e38683 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -97,10 +97,7 @@ pub(crate) mod thrift_gen; mod writer; #[cfg(feature = "encryption")] -use crate::encryption::{ - decrypt::FileDecryptor, - modules::{create_module_aad, ModuleType}, -}; +use crate::encryption::decrypt::FileDecryptor; #[cfg(feature = "encryption")] use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData}; pub(crate) use crate::file::metadata::memory::HeapSize; @@ -117,8 +114,6 @@ use crate::schema::types::{ ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor, Type as SchemaType, }; -#[cfg(feature = "encryption")] -use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; use crate::{ basic::BoundaryOrder, errors::{ParquetError, Result}, @@ -684,93 +679,6 @@ impl RowGroupMetaData { self.file_offset } - /// Method to convert from encrypted Thrift. - #[cfg(feature = "encryption")] - fn from_encrypted_thrift( - schema_descr: SchemaDescPtr, - mut rg: crate::format::RowGroup, - decryptor: Option<&FileDecryptor>, - ) -> Result { - if schema_descr.num_columns() != rg.columns.len() { - return Err(general_err!( - "Column count mismatch. Schema has {} columns while Row Group has {}", - schema_descr.num_columns(), - rg.columns.len() - )); - } - let total_byte_size = rg.total_byte_size; - let num_rows = rg.num_rows; - let mut columns = vec![]; - - for (i, (mut c, d)) in rg - .columns - .drain(0..) - .zip(schema_descr.columns()) - .enumerate() - { - // Read encrypted metadata if it's present and we have a decryptor. - if let (true, Some(decryptor)) = (c.encrypted_column_metadata.is_some(), decryptor) { - let column_decryptor = match c.crypto_metadata.as_ref() { - None => { - return Err(general_err!( - "No crypto_metadata is set for column '{}', which has encrypted metadata", - d.path().string() - )); - } - Some(TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(crypto_metadata)) => { - let column_name = crypto_metadata.path_in_schema.join("."); - decryptor.get_column_metadata_decryptor( - column_name.as_str(), - crypto_metadata.key_metadata.as_deref(), - )? - } - Some(TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => { - decryptor.get_footer_decryptor()? - } - }; - - let column_aad = create_module_aad( - decryptor.file_aad(), - ModuleType::ColumnMetaData, - rg.ordinal.unwrap() as usize, - i, - None, - )?; - - let buf = c.encrypted_column_metadata.clone().unwrap(); - let decrypted_cc_buf = column_decryptor - .decrypt(buf.as_slice(), column_aad.as_ref()) - .map_err(|_| { - general_err!( - "Unable to decrypt column '{}', perhaps the column key is wrong?", - d.path().string() - ) - })?; - - let mut prot = TCompactSliceInputProtocol::new(decrypted_cc_buf.as_slice()); - c.meta_data = Some(crate::format::ColumnMetaData::read_from_in_protocol( - &mut prot, - )?); - } - columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?); - } - - let sorting_columns = rg.sorting_columns.map(|scs| { - scs.iter() - .map(|sc| sc.into()) - .collect::>() - }); - Ok(RowGroupMetaData { - columns, - num_rows, - sorting_columns, - total_byte_size, - schema_descr, - file_offset: rg.file_offset, - ordinal: rg.ordinal, - }) - } - /// Method to convert from Thrift. pub fn from_thrift( schema_descr: SchemaDescPtr, diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 57cc7c57ac66..7ab2db2f7ff3 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -15,32 +15,19 @@ // specific language governing permissions and limitations // under the License. -use std::{io::Read, ops::Range, sync::Arc}; +use std::{io::Read, ops::Range}; -use crate::{ - basic::ColumnOrder, - file::metadata::{FileMetaData, KeyValue}, - parquet_thrift::ThriftCompactInputProtocol, -}; #[cfg(feature = "encryption")] -use crate::{ - encryption::{ - decrypt::{CryptoContext, FileDecryptionProperties, FileDecryptor}, - modules::create_footer_aad, - }, - format::{EncryptionAlgorithm, FileCryptoMetaData as TFileCryptoMetaData}, -}; +use crate::encryption::decrypt::{CryptoContext, FileDecryptionProperties}; +use crate::parquet_thrift::ThriftCompactInputProtocol; use bytes::Bytes; use crate::errors::{ParquetError, Result}; -use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaData, RowGroupMetaData}; +use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaData}; use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::page_index::index_reader::{acc_range, decode_column_index, decode_offset_index}; use crate::file::reader::ChunkReader; use crate::file::{FOOTER_SIZE, PARQUET_MAGIC, PARQUET_MAGIC_ENCR_FOOTER}; -use crate::schema::types; -use crate::schema::types::SchemaDescriptor; -use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; #[cfg(all(feature = "async", feature = "arrow"))] use crate::arrow::async_reader::{MetadataFetch, MetadataSuffixFetch}; @@ -960,101 +947,11 @@ impl ParquetMetaDataReader { encrypted_footer: bool, file_decryption_properties: Option<&FileDecryptionProperties>, ) -> Result { - let mut prot = TCompactSliceInputProtocol::new(buf); - let mut file_decryptor = None; - let decrypted_fmd_buf; - - if encrypted_footer { - if let Some(file_decryption_properties) = file_decryption_properties { - let t_file_crypto_metadata: TFileCryptoMetaData = - TFileCryptoMetaData::read_from_in_protocol(&mut prot) - .map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?; - let supply_aad_prefix = match &t_file_crypto_metadata.encryption_algorithm { - EncryptionAlgorithm::AESGCMV1(algo) => algo.supply_aad_prefix, - _ => Some(false), - } - .unwrap_or(false); - if supply_aad_prefix && file_decryption_properties.aad_prefix().is_none() { - return Err(general_err!( - "Parquet file was encrypted with an AAD prefix that is not stored in the file, \ - but no AAD prefix was provided in the file decryption properties" - )); - } - let decryptor = get_file_decryptor( - t_file_crypto_metadata.encryption_algorithm, - t_file_crypto_metadata.key_metadata.as_deref(), - file_decryption_properties, - )?; - let footer_decryptor = decryptor.get_footer_decryptor(); - let aad_footer = create_footer_aad(decryptor.file_aad())?; - - decrypted_fmd_buf = footer_decryptor? - .decrypt(prot.as_slice().as_ref(), aad_footer.as_ref()) - .map_err(|_| { - general_err!( - "Provided footer key and AAD were unable to decrypt parquet footer" - ) - })?; - prot = TCompactSliceInputProtocol::new(decrypted_fmd_buf.as_ref()); - - file_decryptor = Some(decryptor); - } else { - return Err(general_err!("Parquet file has an encrypted footer but decryption properties were not provided")); - } - } - - let t_file_metadata = crate::format::FileMetaData::read_from_in_protocol(&mut prot) - .map_err(|e| general_err!("Could not parse metadata: {}", e))?; - let schema = types::from_thrift(&t_file_metadata.schema)?; - let schema_descr = Arc::new(SchemaDescriptor::new(schema)); - - if let (Some(algo), Some(file_decryption_properties)) = ( - t_file_metadata.encryption_algorithm, + super::thrift_gen::parquet_metadata_with_encryption( file_decryption_properties, - ) { - // File has a plaintext footer but encryption algorithm is set - let file_decryptor_value = get_file_decryptor( - algo, - t_file_metadata.footer_signing_key_metadata.as_deref(), - file_decryption_properties, - )?; - if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer { - file_decryptor_value.verify_plaintext_footer_signature(buf)?; - } - file_decryptor = Some(file_decryptor_value); - } - - let mut row_groups = Vec::new(); - for rg in t_file_metadata.row_groups { - let r = RowGroupMetaData::from_encrypted_thrift( - schema_descr.clone(), - rg, - file_decryptor.as_ref(), - )?; - row_groups.push(r); - } - let column_orders = - Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?; - - let key_value_metadata = t_file_metadata.key_value_metadata.map(|vkv| { - vkv.into_iter() - .map(|kv| KeyValue::new(kv.key, kv.value)) - .collect::>() - }); - - let file_metadata = FileMetaData::new( - t_file_metadata.version, - t_file_metadata.num_rows, - t_file_metadata.created_by, - key_value_metadata, - schema_descr, - column_orders, - ); - let mut metadata = ParquetMetaData::new(file_metadata, row_groups); - - metadata.with_file_decryptor(file_decryptor); - - Ok(metadata) + encrypted_footer, + buf, + ) } /// Decodes [`ParquetMetaData`] from the provided bytes. @@ -1065,116 +962,17 @@ impl ParquetMetaDataReader { /// /// [Parquet Spec]: https://github.com/apache/parquet-format#metadata pub fn decode_metadata(buf: &[u8]) -> Result { - let mut prot = TCompactSliceInputProtocol::new(buf); - - let t_file_metadata = crate::format::FileMetaData::read_from_in_protocol(&mut prot) - .map_err(|e| general_err!("Could not parse metadata: {}", e))?; - let schema = types::from_thrift(&t_file_metadata.schema)?; - let schema_descr = Arc::new(SchemaDescriptor::new(schema)); - - let mut row_groups = Vec::new(); - for rg in t_file_metadata.row_groups { - row_groups.push(RowGroupMetaData::from_thrift(schema_descr.clone(), rg)?); - } - let column_orders = - Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?; - - let key_value_metadata = t_file_metadata.key_value_metadata.map(|vkv| { - vkv.into_iter() - .map(|kv| KeyValue::new(kv.key, kv.value)) - .collect::>() - }); - - let file_metadata = FileMetaData::new( - t_file_metadata.version, - t_file_metadata.num_rows, - t_file_metadata.created_by, - key_value_metadata, - schema_descr, - column_orders, - ); - - Ok(ParquetMetaData::new(file_metadata, row_groups)) - } - - /// create meta data from thrift encoded bytes - pub fn decode_file_metadata(buf: &[u8]) -> Result { let mut prot = ThriftCompactInputProtocol::new(buf); ParquetMetaData::try_from(&mut prot) } - - /// Parses column orders from Thrift definition. - /// If no column orders are defined, returns `None`. - fn parse_column_orders( - t_column_orders: Option>, - schema_descr: &SchemaDescriptor, - ) -> Result>> { - match t_column_orders { - Some(orders) => { - // Should always be the case - if orders.len() != schema_descr.num_columns() { - return Err(general_err!("Column order length mismatch")); - }; - let mut res = Vec::new(); - for (i, column) in schema_descr.columns().iter().enumerate() { - match orders[i] { - crate::format::ColumnOrder::TYPEORDER(_) => { - let sort_order = ColumnOrder::get_sort_order( - column.logical_type(), - column.converted_type(), - column.physical_type(), - ); - res.push(ColumnOrder::TYPE_DEFINED_ORDER(sort_order)); - } - } - } - Ok(Some(res)) - } - None => Ok(None), - } - } -} - -#[cfg(feature = "encryption")] -fn get_file_decryptor( - encryption_algorithm: EncryptionAlgorithm, - footer_key_metadata: Option<&[u8]>, - file_decryption_properties: &FileDecryptionProperties, -) -> Result { - match encryption_algorithm { - EncryptionAlgorithm::AESGCMV1(algo) => { - let aad_file_unique = algo - .aad_file_unique - .ok_or_else(|| general_err!("AAD unique file identifier is not set"))?; - let aad_prefix = if let Some(aad_prefix) = file_decryption_properties.aad_prefix() { - aad_prefix.clone() - } else { - algo.aad_prefix.unwrap_or_default() - }; - - FileDecryptor::new( - file_decryption_properties, - footer_key_metadata, - aad_file_unique, - aad_prefix, - ) - } - EncryptionAlgorithm::AESGCMCTRV1(_) => Err(nyi_err!( - "The AES_GCM_CTR_V1 encryption algorithm is not yet supported" - )), - } } #[cfg(test)] mod tests { use super::*; use bytes::Bytes; - use zstd::zstd_safe::WriteBuf; - use crate::basic::SortOrder; - use crate::basic::Type; use crate::file::reader::Length; - use crate::schema::types::Type as SchemaType; use crate::util::test_common::file_util::get_test_file; #[test] @@ -1205,61 +1003,6 @@ mod tests { assert!(matches!(err, ParquetError::NeedMoreData(263))); } - #[test] - fn test_metadata_column_orders_parse() { - // Define simple schema, we do not need to provide logical types. - let fields = vec![ - Arc::new( - SchemaType::primitive_type_builder("col1", Type::INT32) - .build() - .unwrap(), - ), - Arc::new( - SchemaType::primitive_type_builder("col2", Type::FLOAT) - .build() - .unwrap(), - ), - ]; - let schema = SchemaType::group_type_builder("schema") - .with_fields(fields) - .build() - .unwrap(); - let schema_descr = SchemaDescriptor::new(Arc::new(schema)); - - let t_column_orders = Some(vec![ - crate::format::ColumnOrder::TYPEORDER(Default::default()), - crate::format::ColumnOrder::TYPEORDER(Default::default()), - ]); - - assert_eq!( - ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr).unwrap(), - Some(vec![ - ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED), - ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED) - ]) - ); - - // Test when no column orders are defined. - assert_eq!( - ParquetMetaDataReader::parse_column_orders(None, &schema_descr).unwrap(), - None - ); - } - - #[test] - fn test_metadata_column_orders_len_mismatch() { - let schema = SchemaType::group_type_builder("schema").build().unwrap(); - let schema_descr = SchemaDescriptor::new(Arc::new(schema)); - - let t_column_orders = Some(vec![crate::format::ColumnOrder::TYPEORDER( - Default::default(), - )]); - - let res = ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr); - assert!(res.is_err()); - assert!(format!("{:?}", res.unwrap_err()).contains("Column order length mismatch")); - } - #[test] #[allow(deprecated)] fn test_try_parse() { @@ -1374,27 +1117,6 @@ mod tests { "EOF: Parquet file too small. Size is 1728 but need 1729" ); } - - #[test] - fn test_new_decoder() { - let file = get_test_file("alltypes_tiny_pages.parquet"); - let len = file.len(); - - // read entire file - let bytes = file.get_bytes(0, len as usize).unwrap(); - let mut footer = [0u8; FOOTER_SIZE]; - footer.copy_from_slice(bytes.slice(len as usize - FOOTER_SIZE..).as_slice()); - let tail = ParquetMetaDataReader::decode_footer_tail(&footer).unwrap(); - let meta_len = tail.metadata_length(); - let metadata_bytes = bytes.slice(len as usize - FOOTER_SIZE - meta_len..); - - // get ParquetMetaData - let m = ParquetMetaDataReader::decode_file_metadata(&metadata_bytes).unwrap(); - let m2 = ParquetMetaDataReader::decode_metadata(&metadata_bytes).unwrap(); - - // check that metadatas are equivalent - assert_eq!(m, m2); - } } #[cfg(all(feature = "async", feature = "arrow", test))] @@ -1412,6 +1134,7 @@ mod async_tests { use std::io::{Read, Seek, SeekFrom}; use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; use tempfile::NamedTempFile; use crate::arrow::ArrowWriter; diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs index f15a5a6b16d8..06229fb1812f 100644 --- a/parquet/src/file/metadata/thrift_gen.rs +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -20,10 +20,10 @@ use std::io::Write; use std::sync::Arc; -#[cfg(feature = "encryption")] -use crate::file::column_crypto_metadata::ColumnCryptoMetaData; use crate::{ - basic::{ColumnOrder, Compression, ConvertedType, Encoding, LogicalType, Repetition, Type}, + basic::{ + ColumnOrder, Compression, ConvertedType, Encoding, LogicalType, PageType, Repetition, Type, + }, data_type::{ByteArray, FixedLenByteArray, Int96}, errors::{ParquetError, Result}, file::{ @@ -39,9 +39,15 @@ use crate::{ WriteThrift, WriteThriftField, }, schema::types::{parquet_schema_from_array, ColumnDescriptor, SchemaDescriptor}, - thrift_struct, + thrift_struct, thrift_union, util::bit_util::FromBytes, }; +#[cfg(feature = "encryption")] +use crate::{ + encryption::decrypt::{FileDecryptionProperties, FileDecryptor}, + file::column_crypto_metadata::ColumnCryptoMetaData, + schema::types::SchemaDescPtr, +}; // this needs to be visible to the schema conversion code thrift_struct!( @@ -60,6 +66,153 @@ pub(crate) struct SchemaElement<'a> { } ); +thrift_struct!( +pub(crate) struct DataPageHeader { + /// Number of values, including NULLs, in this data page. + /// + /// If a OffsetIndex is present, a page must begin at a row + /// boundary (repetition_level = 0). Otherwise, pages may begin + /// within a row (repetition_level > 0). + 1: required i32 num_values + + /// Encoding used for this data page + 2: required Encoding encoding + + /// Encoding used for definition levels + 3: required Encoding definition_level_encoding; + + /// Encoding used for repetition levels + 4: required Encoding repetition_level_encoding; + + // Optional statistics for the data in this page + // page stats are pretty useless...lets ignore them + //5: optional Statistics statistics; +} +); + +thrift_struct!( + pub(crate) struct IndexPageHeader {} +); + +thrift_struct!( +pub(crate) struct DictionaryPageHeader { + /// Number of values in the dictionary + 1: required i32 num_values; + + /// Encoding using this dictionary page + 2: required Encoding encoding + + /// If true, the entries in the dictionary are sorted in ascending order + 3: optional bool is_sorted; +} +); + +thrift_struct!( +pub(crate) struct DataPageHeaderV2 { + /// Number of values, including NULLs, in this data page. + 1: required i32 num_values + /// Number of NULL values, in this data page. + /// Number of non-null = num_values - num_nulls which is also the number of values in the data section + 2: required i32 num_nulls + /// Number of rows in this data page. Every page must begin at a + /// row boundary (repetition_level = 0): rows must **not** be + /// split across page boundaries when using V2 data pages. + 3: required i32 num_rows + /// Encoding used for data in this page + 4: required Encoding encoding + + // repetition levels and definition levels are always using RLE (without size in it) + + /// Length of the definition levels + 5: required i32 definition_levels_byte_length; + /// Length of the repetition levels + 6: required i32 repetition_levels_byte_length; + + /// Whether the values are compressed. + /// Which means the section of the page between + /// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) + /// is compressed with the compression_codec. + /// If missing it is considered compressed + 7: optional bool is_compressed = true; + + // Optional statistics for the data in this page + //8: optional Statistics statistics; +} +); + +thrift_struct!( +#[allow(dead_code)] +pub(crate) struct PageHeader { + /// the type of the page: indicates which of the *_header fields is set + 1: required PageType type_ + + /// Uncompressed page size in bytes (not including this header) + 2: required i32 uncompressed_page_size + + /// Compressed (and potentially encrypted) page size in bytes, not including this header + 3: required i32 compressed_page_size + + /// The 32-bit CRC checksum for the page, to be be calculated as follows: + 4: optional i32 crc + + // Headers for page specific data. One only will be set. + 5: optional DataPageHeader data_page_header; + 6: optional IndexPageHeader index_page_header; + 7: optional DictionaryPageHeader dictionary_page_header; + 8: optional DataPageHeaderV2 data_page_header_v2; +} +); + +thrift_struct!( +pub(crate) struct AesGcmV1<'a> { + /// AAD prefix + 1: optional binary<'a> aad_prefix + + /// Unique file identifier part of AAD suffix + 2: optional binary<'a> aad_file_unique + + /// In files encrypted with AAD prefix without storing it, + /// readers must supply the prefix + 3: optional bool supply_aad_prefix +} +); + +thrift_struct!( +pub(crate) struct AesGcmCtrV1<'a> { + /// AAD prefix + 1: optional binary<'a> aad_prefix + + /// Unique file identifier part of AAD suffix + 2: optional binary<'a> aad_file_unique + + /// In files encrypted with AAD prefix without storing it, + /// readers must supply the prefix + 3: optional bool supply_aad_prefix +} +); + +thrift_union!( +union EncryptionAlgorithm<'a> { + 1: (AesGcmV1<'a>) AES_GCM_V1 + 2: (AesGcmCtrV1<'a>) AES_GCM_CTR_V1 +} +); + +#[cfg(feature = "encryption")] +thrift_struct!( +/// Crypto metadata for files with encrypted footer +pub(crate) struct FileCryptoMetaData<'a> { + /// Encryption algorithm. This field is only used for files + /// with encrypted footer. Files with plaintext footer store algorithm id + /// inside footer (FileMetaData structure). + 1: required EncryptionAlgorithm<'a> encryption_algorithm + + /** Retrieval metadata of key used for encryption of footer, + * and (possibly) columns **/ + 2: optional binary<'a> key_metadata +} +); + // the following are only used internally so are private thrift_struct!( struct FileMetaData<'a> { @@ -71,8 +224,8 @@ struct FileMetaData<'a> { 5: optional list key_value_metadata 6: optional string created_by 7: optional list column_orders; - //8: optional EncryptionAlgorithm encryption_algorithm - //9: optional binary footer_signing_key_metadata + 8: optional EncryptionAlgorithm<'a> encryption_algorithm + 9: optional binary<'a> footer_signing_key_metadata } ); @@ -172,7 +325,7 @@ struct SizeStatistics { ); thrift_struct!( -struct Statistics<'a> { +pub(crate) struct Statistics<'a> { 1: optional binary<'a> max; 2: optional binary<'a> min; 3: optional i64 null_count; @@ -304,7 +457,7 @@ fn convert_column( Ok(result) } -fn convert_stats( +pub(crate) fn convert_stats( physical_type: Type, thrift_stats: Option, ) -> Result> { @@ -453,6 +606,250 @@ fn convert_stats( }) } +#[cfg(feature = "encryption")] +fn row_group_from_encrypted_thrift( + mut rg: RowGroup, + schema_descr: SchemaDescPtr, + decryptor: Option<&FileDecryptor>, +) -> Result { + if schema_descr.num_columns() != rg.columns.len() { + return Err(general_err!( + "Column count mismatch. Schema has {} columns while Row Group has {}", + schema_descr.num_columns(), + rg.columns.len() + )); + } + let total_byte_size = rg.total_byte_size; + let num_rows = rg.num_rows; + let mut columns = vec![]; + + for (i, (mut c, d)) in rg + .columns + .drain(0..) + .zip(schema_descr.columns()) + .enumerate() + { + // Read encrypted metadata if it's present and we have a decryptor. + if let (true, Some(decryptor)) = (c.encrypted_column_metadata.is_some(), decryptor) { + let column_decryptor = match c.crypto_metadata.as_ref() { + None => { + return Err(general_err!( + "No crypto_metadata is set for column '{}', which has encrypted metadata", + d.path().string() + )); + } + Some(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(crypto_metadata)) => { + let column_name = crypto_metadata.path_in_schema.join("."); + decryptor.get_column_metadata_decryptor( + column_name.as_str(), + crypto_metadata.key_metadata.as_deref(), + )? + } + Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY) => { + decryptor.get_footer_decryptor()? + } + }; + + let column_aad = crate::encryption::modules::create_module_aad( + decryptor.file_aad(), + crate::encryption::modules::ModuleType::ColumnMetaData, + rg.ordinal.unwrap() as usize, + i, + None, + )?; + + let buf = c.encrypted_column_metadata.unwrap(); + let decrypted_cc_buf = + column_decryptor + .decrypt(buf, column_aad.as_ref()) + .map_err(|_| { + general_err!( + "Unable to decrypt column '{}', perhaps the column key is wrong?", + d.path().string() + ) + })?; + + let mut prot = ThriftCompactInputProtocol::new(decrypted_cc_buf.as_slice()); + let col_meta = ColumnMetaData::try_from(&mut prot)?; + c.meta_data = Some(col_meta); + columns.push(convert_column(c, d.clone())?); + } else { + columns.push(convert_column(c, d.clone())?); + } + } + + let sorting_columns = rg.sorting_columns; + let file_offset = rg.file_offset; + let ordinal = rg.ordinal; + + Ok(RowGroupMetaData { + columns, + num_rows, + sorting_columns, + total_byte_size, + schema_descr, + file_offset, + ordinal, + }) +} + +#[cfg(feature = "encryption")] +pub(crate) fn parquet_metadata_with_encryption( + file_decryption_properties: Option<&FileDecryptionProperties>, + encrypted_footer: bool, + buf: &[u8], +) -> Result { + let mut prot = ThriftCompactInputProtocol::new(buf); + let mut file_decryptor = None; + let decrypted_fmd_buf; + + if encrypted_footer { + if let Some(file_decryption_properties) = file_decryption_properties { + let t_file_crypto_metadata: FileCryptoMetaData = + FileCryptoMetaData::try_from(&mut prot) + .map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?; + let supply_aad_prefix = match &t_file_crypto_metadata.encryption_algorithm { + EncryptionAlgorithm::AES_GCM_V1(algo) => algo.supply_aad_prefix, + _ => Some(false), + } + .unwrap_or(false); + if supply_aad_prefix && file_decryption_properties.aad_prefix().is_none() { + return Err(general_err!( + "Parquet file was encrypted with an AAD prefix that is not stored in the file, \ + but no AAD prefix was provided in the file decryption properties" + )); + } + let decryptor = get_file_decryptor( + t_file_crypto_metadata.encryption_algorithm, + t_file_crypto_metadata.key_metadata, + file_decryption_properties, + )?; + let footer_decryptor = decryptor.get_footer_decryptor(); + let aad_footer = crate::encryption::modules::create_footer_aad(decryptor.file_aad())?; + + decrypted_fmd_buf = footer_decryptor? + .decrypt(prot.as_slice().as_ref(), aad_footer.as_ref()) + .map_err(|_| { + general_err!( + "Provided footer key and AAD were unable to decrypt parquet footer" + ) + })?; + prot = ThriftCompactInputProtocol::new(decrypted_fmd_buf.as_ref()); + + file_decryptor = Some(decryptor); + } else { + return Err(general_err!( + "Parquet file has an encrypted footer but decryption properties were not provided" + )); + } + } + + let file_meta = super::thrift_gen::FileMetaData::try_from(&mut prot) + .map_err(|e| general_err!("Could not parse metadata: {}", e))?; + + let version = file_meta.version; + let num_rows = file_meta.num_rows; + let created_by = file_meta.created_by.map(|c| c.to_owned()); + let key_value_metadata = file_meta.key_value_metadata; + + let val = parquet_schema_from_array(file_meta.schema)?; + let schema_descr = Arc::new(SchemaDescriptor::new(val)); + + if let (Some(algo), Some(file_decryption_properties)) = + (file_meta.encryption_algorithm, file_decryption_properties) + { + // File has a plaintext footer but encryption algorithm is set + let file_decryptor_value = get_file_decryptor( + algo, + file_meta.footer_signing_key_metadata, + file_decryption_properties, + )?; + if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer { + file_decryptor_value.verify_plaintext_footer_signature(buf)?; + } + file_decryptor = Some(file_decryptor_value); + } + + // decrypt column chunk info + let mut row_groups = Vec::with_capacity(file_meta.row_groups.len()); + for rg in file_meta.row_groups { + let r = row_group_from_encrypted_thrift(rg, schema_descr.clone(), file_decryptor.as_ref())?; + row_groups.push(r); + } + + // need to map read column orders to actual values based on the schema + if file_meta + .column_orders + .as_ref() + .is_some_and(|cos| cos.len() != schema_descr.num_columns()) + { + return Err(general_err!("Column order length mismatch")); + } + + let column_orders = file_meta.column_orders.map(|cos| { + let mut res = Vec::with_capacity(cos.len()); + for (i, column) in schema_descr.columns().iter().enumerate() { + match cos[i] { + ColumnOrder::TYPE_DEFINED_ORDER(_) => { + let sort_order = ColumnOrder::get_sort_order( + column.logical_type(), + column.converted_type(), + column.physical_type(), + ); + res.push(ColumnOrder::TYPE_DEFINED_ORDER(sort_order)); + } + _ => res.push(cos[i]), + } + } + res + }); + + let fmd = crate::file::metadata::FileMetaData::new( + version, + num_rows, + created_by, + key_value_metadata, + schema_descr, + column_orders, + ); + let mut metadata = ParquetMetaData::new(fmd, row_groups); + + metadata.with_file_decryptor(file_decryptor); + + Ok(metadata) +} + +#[cfg(feature = "encryption")] +pub(super) fn get_file_decryptor( + encryption_algorithm: EncryptionAlgorithm, + footer_key_metadata: Option<&[u8]>, + file_decryption_properties: &FileDecryptionProperties, +) -> Result { + match encryption_algorithm { + EncryptionAlgorithm::AES_GCM_V1(algo) => { + let aad_file_unique = algo + .aad_file_unique + .ok_or_else(|| general_err!("AAD unique file identifier is not set"))?; + let aad_prefix = if let Some(aad_prefix) = file_decryption_properties.aad_prefix() { + aad_prefix.clone() + } else { + algo.aad_prefix.map(|v| v.to_vec()).unwrap_or_default() + }; + let aad_file_unique = aad_file_unique.to_vec(); + + FileDecryptor::new( + file_decryption_properties, + footer_key_metadata, + aad_file_unique, + aad_prefix, + ) + } + EncryptionAlgorithm::AES_GCM_CTR_V1(_) => Err(nyi_err!( + "The AES_GCM_CTR_V1 encryption algorithm is not yet supported" + )), + } +} + /// Create ParquetMetaData from thrift input. Note that this only decodes the file metadata in /// the Parquet footer. Page indexes will need to be added later. impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ParquetMetaData { diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 5308825b0976..728598045315 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -1875,9 +1875,15 @@ mod tests { 80, 65, 82, 49, ]; let ret = SerializedFileReader::new(Bytes::copy_from_slice(&data)); + #[cfg(feature = "encryption")] assert_eq!( ret.err().unwrap().to_string(), - "Parquet error: Could not parse metadata: bad data" + "Parquet error: Could not parse metadata: Parquet error: Received empty union from remote ColumnOrder" + ); + #[cfg(not(feature = "encryption"))] + assert_eq!( + ret.err().unwrap().to_string(), + "Parquet error: Received empty union from remote ColumnOrder" ); } diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index eb523a6982a0..939f3cb339ab 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -185,17 +185,17 @@ macro_rules! thrift_union_all_empty { #[macro_export] #[allow(clippy::crate_in_macro_def)] macro_rules! thrift_union { - ($(#[$($def_attrs:tt)*])* union $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_id:literal : $( ( $field_type:ident $(< $element_type:ident >)? ) )? $field_name:ident $(;)?)* }) => { + ($(#[$($def_attrs:tt)*])* union $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $( ( $field_type:ident $(< $element_type:ident >)? $(< $field_lt:lifetime >)?) )? $field_name:ident $(;)?)* }) => { $(#[cfg_attr(not(doctest), $($def_attrs)*)])* #[derive(Clone, Debug, Eq, PartialEq)] #[allow(non_camel_case_types)] #[allow(non_snake_case)] #[allow(missing_docs)] - pub enum $identifier { - $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name $( ( $crate::__thrift_union_type!{$field_type $($element_type)?} ) )?),* + pub enum $identifier $(<$lt>)? { + $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name $( ( $crate::__thrift_union_type!{$field_type $($field_lt)? $($element_type)?} ) )?),* } - impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { + impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier $(<$lt>)? { type Error = ParquetError; fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { @@ -224,7 +224,7 @@ macro_rules! thrift_union { } } - impl WriteThrift for $identifier { + impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? { const ELEMENT_TYPE: ElementType = ElementType::Struct; fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { @@ -236,7 +236,7 @@ macro_rules! thrift_union { } } - impl WriteThriftField for $identifier { + impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? { fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; self.write_thrift(writer)?; @@ -316,6 +316,7 @@ macro_rules! thrift_struct { #[allow(unused_assignments)] fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + #[allow(unused_mut, unused_variables)] let mut last_field_id = 0i16; $($crate::__thrift_write_required_or_optional_field!($required_or_optional $field_name, $field_id, $field_type, self, writer, last_field_id);)* writer.write_struct_end() @@ -470,6 +471,9 @@ macro_rules! __thrift_field_type { #[doc(hidden)] #[macro_export] macro_rules! __thrift_union_type { + (binary $lt:lifetime) => { &$lt [u8] }; + (string $lt:lifetime) => { &$lt str }; + ($field_type:ident $lt:lifetime) => { $field_type<$lt> }; ($field_type:ident) => { $field_type }; (list $field_type:ident) => { Vec<$field_type> }; } diff --git a/parquet/tests/arrow_reader/bad_data.rs b/parquet/tests/arrow_reader/bad_data.rs index 619bbb862fe1..ecf449a7ce61 100644 --- a/parquet/tests/arrow_reader/bad_data.rs +++ b/parquet/tests/arrow_reader/bad_data.rs @@ -80,10 +80,13 @@ fn test_invalid_files() { #[test] fn test_parquet_1481() { let err = read_file("PARQUET-1481.parquet").unwrap_err(); + #[cfg(feature = "encryption")] assert_eq!( err.to_string(), - "Parquet error: Unexpected parquet Type: -7" + "Parquet error: Could not parse metadata: Parquet error: Unexpected Type -7" ); + #[cfg(not(feature = "encryption"))] + assert_eq!(err.to_string(), "Parquet error: Unexpected Type -7"); } #[test] From c327d7f44ab53ca879ee30783abcdd1da5d072bd Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 17 Sep 2025 12:12:31 -0700 Subject: [PATCH 10/15] [thrift-remodel] Rework thrift reader API (#8341) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change As I started on decoding thrift page headers, I found that the way I had been going was no longer going to work. This PR begins the process of abstracting the thrift reader to allow for other implementations. # What changes are included in this PR? In addition to reworking the reader itself, this PR moves away from the previous `TryFrom` approach and instead adds a `ReadThrift` trait. # Are these changes tested? Should be covered by existing tests # Are there any user-facing changes? Yes --- parquet/src/basic.rs | 52 +-- parquet/src/file/column_crypto_metadata.rs | 4 +- parquet/src/file/metadata/mod.rs | 4 +- parquet/src/file/metadata/reader.rs | 6 +- parquet/src/file/metadata/thrift_gen.rs | 24 +- parquet/src/file/page_encoding_stats.rs | 6 +- parquet/src/file/page_index/index_reader.rs | 14 +- parquet/src/file/page_index/offset_index.rs | 12 +- parquet/src/parquet_macros.rs | 91 ++--- parquet/src/parquet_thrift.rs | 414 ++++++++++---------- 10 files changed, 311 insertions(+), 316 deletions(-) diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 5fffb56cdf74..44fe66aff7e3 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -26,8 +26,8 @@ use std::{fmt, str}; pub use crate::compression::{BrotliLevel, GzipLevel, ZstdLevel}; use crate::parquet_thrift::{ - ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, - WriteThriftField, + ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, + WriteThrift, WriteThriftField, }; use crate::{thrift_enum, thrift_struct, thrift_union_all_empty}; @@ -165,9 +165,8 @@ pub enum ConvertedType { INTERVAL, } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ConvertedType { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ConvertedType { + fn read_thrift(prot: &mut R) -> Result { let val = prot.read_i32()?; Ok(match val { 0 => Self::UTF8, @@ -361,12 +360,9 @@ pub enum LogicalType { }, } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for LogicalType { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - prot.read_struct_begin()?; - - let field_ident = prot.read_field_begin()?; +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType { + fn read_thrift(prot: &mut R) -> Result { + let field_ident = prot.read_field_begin(0)?; if field_ident.field_type == FieldType::Stop { return Err(general_err!("received empty union from remote LogicalType")); } @@ -388,7 +384,7 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for LogicalType { Self::Enum } 5 => { - let val = DecimalType::try_from(&mut *prot)?; + let val = DecimalType::read_thrift(&mut *prot)?; Self::Decimal { scale: val.scale, precision: val.precision, @@ -399,21 +395,21 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for LogicalType { Self::Date } 7 => { - let val = TimeType::try_from(&mut *prot)?; + let val = TimeType::read_thrift(&mut *prot)?; Self::Time { is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c, unit: val.unit, } } 8 => { - let val = TimestampType::try_from(&mut *prot)?; + let val = TimestampType::read_thrift(&mut *prot)?; Self::Timestamp { is_adjusted_to_u_t_c: val.is_adjusted_to_u_t_c, unit: val.unit, } } 10 => { - let val = IntType::try_from(&mut *prot)?; + let val = IntType::read_thrift(&mut *prot)?; Self::Integer { is_signed: val.is_signed, bit_width: val.bit_width, @@ -440,19 +436,19 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for LogicalType { Self::Float16 } 16 => { - let val = VariantType::try_from(&mut *prot)?; + let val = VariantType::read_thrift(&mut *prot)?; Self::Variant { specification_version: val.specification_version, } } 17 => { - let val = GeometryType::try_from(&mut *prot)?; + let val = GeometryType::read_thrift(&mut *prot)?; Self::Geometry { crs: val.crs.map(|s| s.to_owned()), } } 18 => { - let val = GeographyType::try_from(&mut *prot)?; + let val = GeographyType::read_thrift(&mut *prot)?; Self::Geography { crs: val.crs.map(|s| s.to_owned()), algorithm: val.algorithm, @@ -465,13 +461,12 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for LogicalType { } } }; - let field_ident = prot.read_field_begin()?; + let field_ident = prot.read_field_begin(field_ident.id)?; if field_ident.field_type != FieldType::Stop { return Err(general_err!( "Received multiple fields for union from remote LogicalType" )); } - prot.read_struct_end()?; Ok(ret) } } @@ -756,9 +751,8 @@ pub enum Compression { LZ4_RAW, } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Compression { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for Compression { + fn read_thrift(prot: &mut R) -> Result { let val = prot.read_i32()?; Ok(match val { 0 => Self::UNCOMPRESSED, @@ -1124,12 +1118,9 @@ impl ColumnOrder { } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ColumnOrder { - type Error = ParquetError; - - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - prot.read_struct_begin()?; - let field_ident = prot.read_field_begin()?; +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ColumnOrder { + fn read_thrift(prot: &mut R) -> Result { + let field_ident = prot.read_field_begin(0)?; if field_ident.field_type == FieldType::Stop { return Err(general_err!("Received empty union from remote ColumnOrder")); } @@ -1144,13 +1135,12 @@ impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ColumnOrder { Self::UNKNOWN } }; - let field_ident = prot.read_field_begin()?; + let field_ident = prot.read_field_begin(field_ident.id)?; if field_ident.field_type != FieldType::Stop { return Err(general_err!( "Received multiple fields for union from remote ColumnOrder" )); } - prot.read_struct_end()?; Ok(ret) } } diff --git a/parquet/src/file/column_crypto_metadata.rs b/parquet/src/file/column_crypto_metadata.rs index 5bba07357947..6a538bd42bc0 100644 --- a/parquet/src/file/column_crypto_metadata.rs +++ b/parquet/src/file/column_crypto_metadata.rs @@ -26,8 +26,8 @@ use crate::format::{ EncryptionWithFooterKey as TEncryptionWithFooterKey, }; use crate::parquet_thrift::{ - ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, - WriteThriftField, + read_thrift_vec, ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, + ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, }; use crate::{thrift_struct, thrift_union}; diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index d23d46a33b84..95e9a48b46f3 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -121,8 +121,8 @@ use crate::{ use crate::{ basic::{ColumnOrder, Compression, Encoding, Type}, parquet_thrift::{ - ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, - WriteThrift, WriteThriftField, + ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, + ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, }, }; use crate::{ diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs index 46022b459da0..73c6a8ee40d0 100644 --- a/parquet/src/file/metadata/reader.rs +++ b/parquet/src/file/metadata/reader.rs @@ -19,7 +19,7 @@ use std::{io::Read, ops::Range}; #[cfg(feature = "encryption")] use crate::encryption::decrypt::{CryptoContext, FileDecryptionProperties}; -use crate::parquet_thrift::ThriftCompactInputProtocol; +use crate::parquet_thrift::{ReadThrift, ThriftSliceInputProtocol}; use bytes::Bytes; use crate::errors::{ParquetError, Result}; @@ -962,8 +962,8 @@ impl ParquetMetaDataReader { /// /// [Parquet Spec]: https://github.com/apache/parquet-format#metadata pub fn decode_metadata(buf: &[u8]) -> Result { - let mut prot = ThriftCompactInputProtocol::new(buf); - ParquetMetaData::try_from(&mut prot) + let mut prot = ThriftSliceInputProtocol::new(buf); + ParquetMetaData::read_thrift(&mut prot) } } diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs index 06229fb1812f..b656bacc8c7d 100644 --- a/parquet/src/file/metadata/thrift_gen.rs +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -35,8 +35,8 @@ use crate::{ statistics::ValueStatistics, }, parquet_thrift::{ - ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, - WriteThrift, WriteThriftField, + read_thrift_vec, ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, + ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, }, schema::types::{parquet_schema_from_array, ColumnDescriptor, SchemaDescriptor}, thrift_struct, thrift_union, @@ -46,6 +46,7 @@ use crate::{ use crate::{ encryption::decrypt::{FileDecryptionProperties, FileDecryptor}, file::column_crypto_metadata::ColumnCryptoMetaData, + parquet_thrift::ThriftSliceInputProtocol, schema::types::SchemaDescPtr, }; @@ -669,8 +670,8 @@ fn row_group_from_encrypted_thrift( ) })?; - let mut prot = ThriftCompactInputProtocol::new(decrypted_cc_buf.as_slice()); - let col_meta = ColumnMetaData::try_from(&mut prot)?; + let mut prot = ThriftSliceInputProtocol::new(decrypted_cc_buf.as_slice()); + let col_meta = ColumnMetaData::read_thrift(&mut prot)?; c.meta_data = Some(col_meta); columns.push(convert_column(c, d.clone())?); } else { @@ -699,14 +700,14 @@ pub(crate) fn parquet_metadata_with_encryption( encrypted_footer: bool, buf: &[u8], ) -> Result { - let mut prot = ThriftCompactInputProtocol::new(buf); + let mut prot = ThriftSliceInputProtocol::new(buf); let mut file_decryptor = None; let decrypted_fmd_buf; if encrypted_footer { if let Some(file_decryption_properties) = file_decryption_properties { let t_file_crypto_metadata: FileCryptoMetaData = - FileCryptoMetaData::try_from(&mut prot) + FileCryptoMetaData::read_thrift(&mut prot) .map_err(|e| general_err!("Could not parse crypto metadata: {}", e))?; let supply_aad_prefix = match &t_file_crypto_metadata.encryption_algorithm { EncryptionAlgorithm::AES_GCM_V1(algo) => algo.supply_aad_prefix, @@ -734,7 +735,7 @@ pub(crate) fn parquet_metadata_with_encryption( "Provided footer key and AAD were unable to decrypt parquet footer" ) })?; - prot = ThriftCompactInputProtocol::new(decrypted_fmd_buf.as_ref()); + prot = ThriftSliceInputProtocol::new(decrypted_fmd_buf.as_ref()); file_decryptor = Some(decryptor); } else { @@ -744,7 +745,7 @@ pub(crate) fn parquet_metadata_with_encryption( } } - let file_meta = super::thrift_gen::FileMetaData::try_from(&mut prot) + let file_meta = super::thrift_gen::FileMetaData::read_thrift(&mut prot) .map_err(|e| general_err!("Could not parse metadata: {}", e))?; let version = file_meta.version; @@ -852,10 +853,9 @@ pub(super) fn get_file_decryptor( /// Create ParquetMetaData from thrift input. Note that this only decodes the file metadata in /// the Parquet footer. Page indexes will need to be added later. -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for ParquetMetaData { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - let file_meta = super::thrift_gen::FileMetaData::try_from(prot)?; +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ParquetMetaData { + fn read_thrift(prot: &mut R) -> Result { + let file_meta = super::thrift_gen::FileMetaData::read_thrift(prot)?; let version = file_meta.version; let num_rows = file_meta.num_rows; diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs index 2d433dc9b3f1..934e177de0da 100644 --- a/parquet/src/file/page_encoding_stats.rs +++ b/parquet/src/file/page_encoding_stats.rs @@ -20,10 +20,10 @@ use std::io::Write; use crate::basic::{Encoding, PageType}; -use crate::errors::{ParquetError, Result}; +use crate::errors::Result; use crate::parquet_thrift::{ - ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, - WriteThriftField, + ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, + WriteThrift, WriteThriftField, }; use crate::thrift_struct; diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index e9cf119224c9..3db597954e6c 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -27,8 +27,8 @@ use crate::file::page_index::column_index::{ use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::file::reader::ChunkReader; use crate::parquet_thrift::{ - ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, - WriteThriftField, + read_thrift_vec, ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, + ThriftCompactOutputProtocol, ThriftSliceInputProtocol, WriteThrift, WriteThriftField, }; use crate::thrift_struct; use std::io::Write; @@ -136,15 +136,15 @@ pub fn read_offset_indexes( } pub(crate) fn decode_offset_index(data: &[u8]) -> Result { - let mut prot = ThriftCompactInputProtocol::new(data); + let mut prot = ThriftSliceInputProtocol::new(data); // Try to read fast-path first. If that fails, fall back to slower but more robust // decoder. match OffsetIndexMetaData::try_from_fast(&mut prot) { Ok(offset_index) => Ok(offset_index), Err(_) => { - prot = ThriftCompactInputProtocol::new(data); - OffsetIndexMetaData::try_from(&mut prot) + prot = ThriftSliceInputProtocol::new(data); + OffsetIndexMetaData::read_thrift(&mut prot) } } } @@ -166,8 +166,8 @@ pub(crate) fn decode_column_index( data: &[u8], column_type: Type, ) -> Result { - let mut prot = ThriftCompactInputProtocol::new(data); - let index = ThriftColumnIndex::try_from(&mut prot)?; + let mut prot = ThriftSliceInputProtocol::new(data); + let index = ThriftColumnIndex::read_thrift(&mut prot)?; let index = match column_type { Type::BOOLEAN => { diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index ac2620af09d8..2153b8ed3009 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -22,8 +22,8 @@ use std::io::Write; use crate::parquet_thrift::{ - ElementType, FieldType, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, - WriteThriftField, + read_thrift_vec, ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, + ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, }; use crate::{ errors::{ParquetError, Result}, @@ -113,7 +113,9 @@ impl OffsetIndexMetaData { // Fast-path read of offset index. This works because we expect all field deltas to be 1, // and there's no nesting beyond PageLocation, so no need to save the last field id. Like // read_page_locations(), this will fail if absolute field id's are used. - pub(super) fn try_from_fast<'a>(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + pub(super) fn try_from_fast<'a, R: ThriftCompactInputProtocol<'a>>( + prot: &mut R, + ) -> Result { // Offset index is a struct with 2 fields. First field is an array of PageLocations, // the second an optional array of i64. @@ -140,7 +142,7 @@ impl OffsetIndexMetaData { "encountered unknown field while reading OffsetIndex" )); } - let vec = Vec::::try_from(&mut *prot)?; + let vec = read_thrift_vec::(&mut *prot)?; unencoded_byte_array_data_bytes = Some(vec); // this one should be Stop @@ -164,7 +166,7 @@ impl OffsetIndexMetaData { // Note: this will fail if the fields are either out of order, or if a suboptimal // encoder doesn't use field deltas. -fn read_page_location<'a>(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +fn read_page_location<'a, R: ThriftCompactInputProtocol<'a>>(prot: &mut R) -> Result { // there are 3 fields, all mandatory, so all field deltas should be 1 let (field_type, delta) = prot.read_field_header()?; if delta != 1 || field_type != FieldType::I64 as u8 { diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index 939f3cb339ab..889e5fafef60 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -37,10 +37,9 @@ macro_rules! thrift_enum { $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name = $field_value,)* } - impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { - type Error = ParquetError; + impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier { #[allow(deprecated)] - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + fn read_thrift(prot: &mut R) -> Result { let val = prot.read_i32()?; match val { $($field_value => Ok(Self::$field_name),)* @@ -109,12 +108,9 @@ macro_rules! thrift_union_all_empty { $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name),* } - impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier { - type Error = ParquetError; - - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - prot.read_struct_begin()?; - let field_ident = prot.read_field_begin()?; + impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier { + fn read_thrift(prot: &mut R) -> Result { + let field_ident = prot.read_field_begin(0)?; if field_ident.field_type == FieldType::Stop { return Err(general_err!("Received empty union from remote {}", stringify!($identifier))); } @@ -128,13 +124,12 @@ macro_rules! thrift_union_all_empty { return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id)); } }; - let field_ident = prot.read_field_begin()?; + let field_ident = prot.read_field_begin(field_ident.id)?; if field_ident.field_type != FieldType::Stop { return Err(general_err!( "Received multiple fields for union from remote {}", stringify!($identifier) )); } - prot.read_struct_end()?; Ok(ret) } } @@ -195,12 +190,9 @@ macro_rules! thrift_union { $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $field_name $( ( $crate::__thrift_union_type!{$field_type $($field_lt)? $($element_type)?} ) )?),* } - impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier $(<$lt>)? { - type Error = ParquetError; - - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - prot.read_struct_begin()?; - let field_ident = prot.read_field_begin()?; + impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? { + fn read_thrift(prot: &mut R) -> Result { + let field_ident = prot.read_field_begin(0)?; if field_ident.field_type == FieldType::Stop { return Err(general_err!("Received empty union from remote {}", stringify!($identifier))); } @@ -213,13 +205,12 @@ macro_rules! thrift_union { return Err(general_err!("Unexpected {} {}", stringify!($identifier), field_ident.id)); } }; - let field_ident = prot.read_field_begin()?; + let field_ident = prot.read_field_begin(field_ident.id)?; if field_ident.field_type != FieldType::Stop { return Err(general_err!( concat!("Received multiple fields for union from remote {}", stringify!($identifier)) )); } - prot.read_struct_end()?; Ok(ret) } } @@ -283,27 +274,26 @@ macro_rules! thrift_struct { $($(#[cfg_attr(not(doctest), $($field_attrs)*)])* $vis $field_name: $crate::__thrift_required_or_optional!($required_or_optional $crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?))),* } - impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for $identifier $(<$lt>)? { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { + impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? { + fn read_thrift(prot: &mut R) -> Result { $(let mut $field_name: Option<$crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?)> = None;)* - prot.read_struct_begin()?; + let mut last_field_id = 0i16; loop { - let field_ident = prot.read_field_begin()?; + let field_ident = prot.read_field_begin(last_field_id)?; if field_ident.field_type == FieldType::Stop { break; } match field_ident.id { $($field_id => { - let val = $crate::__thrift_read_field!(prot, $field_type $($field_lt)? $($element_type)?); + let val = $crate::__thrift_read_field!(prot, field_ident, $field_type $($field_lt)? $($element_type)?); $field_name = Some(val); })* _ => { prot.skip(field_ident.field_type)?; } }; + last_field_id = field_ident.id; } - prot.read_struct_end()?; $($crate::__thrift_result_required_or_optional!($required_or_optional $field_name);)* Ok(Self { $($field_name),* @@ -417,39 +407,42 @@ macro_rules! __thrift_result_required_or_optional { #[doc(hidden)] #[macro_export] macro_rules! __thrift_read_field { - ($prot:tt, list $lt:lifetime binary) => { - Vec::<&'a [u8]>::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, list $lt:lifetime binary) => { + read_thrift_vec::<&'a [u8], R>(&mut *$prot)? }; - ($prot:tt, list $lt:lifetime $element_type:ident) => { - Vec::<$element_type>::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, list $lt:lifetime $element_type:ident) => { + read_thrift_vec::<$element_type, R>(&mut *$prot)? }; - ($prot:tt, list string) => { - Vec::::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, list string) => { + read_thrift_vec::(&mut *$prot)? }; - ($prot:tt, list $element_type:ident) => { - Vec::<$element_type>::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, list $element_type:ident) => { + read_thrift_vec::<$element_type, R>(&mut *$prot)? }; - ($prot:tt, string $lt:lifetime) => { - <&$lt str>::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, string $lt:lifetime) => { + <&$lt str>::read_thrift(&mut *$prot)? }; - ($prot:tt, binary $lt:lifetime) => { - <&$lt [u8]>::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, binary $lt:lifetime) => { + <&$lt [u8]>::read_thrift(&mut *$prot)? }; - ($prot:tt, $field_type:ident $lt:lifetime) => { - $field_type::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, $field_type:ident $lt:lifetime) => { + $field_type::read_thrift(&mut *$prot)? }; - ($prot:tt, string) => { - String::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, string) => { + String::read_thrift(&mut *$prot)? }; - ($prot:tt, binary) => { + ($prot:tt, $field_ident:tt, binary) => { // this one needs to not conflict with `list` $prot.read_bytes()?.to_vec() }; - ($prot:tt, double) => { - $crate::parquet_thrift::OrderedF64::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, double) => { + $crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)? + }; + ($prot:tt, $field_ident:tt, bool) => { + $field_ident.bool_val.unwrap() }; - ($prot:tt, $field_type:ident) => { - $field_type::try_from(&mut *$prot)? + ($prot:tt, $field_ident:tt, $field_type:ident) => { + $field_type::read_thrift(&mut *$prot)? }; } @@ -482,10 +475,10 @@ macro_rules! __thrift_union_type { #[macro_export] macro_rules! __thrift_read_variant { ($prot:tt, $field_name:ident $field_type:ident) => { - Self::$field_name($field_type::try_from(&mut *$prot)?) + Self::$field_name($field_type::read_thrift(&mut *$prot)?) }; ($prot:tt, $field_name:ident list $field_type:ident) => { - Self::$field_name(Vec::<$field_type>::try_from(&mut *$prot)?) + Self::$field_name(Vec::<$field_type>::read_thrift(&mut *$prot)?) }; ($prot:tt, $field_name:ident) => {{ $prot.skip_empty_struct()?; diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index 9b83c0a01b8d..17847d0b71e5 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -24,10 +24,9 @@ use std::{cmp::Ordering, io::Write}; use crate::errors::{ParquetError, Result}; -// Couldn't implement thrift structs with f64 do to lack of Eq -// for f64. This is a hacky workaround for now...there are other -// wrappers out there that should probably be used instead. -// thrift seems to re-export an impl from ordered-float +/// Wrapper for thrift `double` fields. This is used to provide +/// an implementation of `Eq` for floats. This implementation +/// uses IEEE 754 total order. #[derive(Debug, Clone, Copy, PartialEq)] pub struct OrderedF64(f64); @@ -156,53 +155,52 @@ impl TryFrom for ElementType { } } +/// Struct used to describe a [thrift struct] field during decoding. +/// +/// [thrift struct]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#struct-encoding pub(crate) struct FieldIdentifier { + /// The type for the field. pub(crate) field_type: FieldType, + /// The field's `id`. May be computed from delta or directly decoded. pub(crate) id: i16, + /// Stores the value for booleans. + /// + /// Boolean fields store no data, instead the field type is either boolean true, or + /// boolean false. + pub(crate) bool_val: Option, } +/// Struct used to describe a [thrift list]. +/// +/// [thrift list]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set #[derive(Clone, Debug, Eq, PartialEq)] pub(crate) struct ListIdentifier { + /// The type for each element in the list. pub(crate) element_type: ElementType, + /// Number of elements contained in the list. pub(crate) size: i32, } -/// A more performant implementation of [`TCompactInputProtocol`] that reads a slice +/// Low-level object used to deserialize structs encoded with the Thrift [compact] protocol. /// -/// [`TCompactInputProtocol`]: thrift::protocol::TCompactInputProtocol -pub(crate) struct ThriftCompactInputProtocol<'a> { - buf: &'a [u8], - // Identifier of the last field deserialized for a struct. - last_read_field_id: i16, - // Stack of the last read field ids (a new entry is added each time a nested struct is read). - read_field_id_stack: Vec, - // Boolean value for a field. - // Saved because boolean fields and their value are encoded in a single byte, - // and reading the field only occurs after the field id is read. - pending_read_bool_value: Option, -} - -impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { - pub fn new(buf: &'a [u8]) -> Self { - Self { - buf, - last_read_field_id: 0, - read_field_id_stack: Vec::with_capacity(16), - pending_read_bool_value: None, - } - } +/// Implementation of this trait must provide the low-level functions `read_byte`, `read_bytes`, +/// `skip_bytes`, and `read_double`. These primitives are used by the default functions provided +/// here to perform deserialization. +/// +/// [compact]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md +pub(crate) trait ThriftCompactInputProtocol<'a> { + /// Read a single byte from the input. + fn read_byte(&mut self) -> Result; - pub fn reset_buffer(&mut self, buf: &'a [u8]) { - self.buf = buf; - self.last_read_field_id = 0; - self.read_field_id_stack.clear(); - self.pending_read_bool_value = None; - } + /// Read a Thrift encoded [binary] from the input. + /// + /// [binary]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#binary-encoding + fn read_bytes(&mut self) -> Result<&'a [u8]>; - pub fn as_slice(&self) -> &'a [u8] { - self.buf - } + /// Skip the next `n` bytes of input. + fn skip_bytes(&mut self, n: usize) -> Result<()>; + /// Read a ULEB128 encoded unsigned varint from the input. fn read_vlq(&mut self) -> Result { let mut in_progress = 0; let mut shift = 0; @@ -216,12 +214,14 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { } } + /// Read a zig-zag encoded signed varint from the input. fn read_zig_zag(&mut self) -> Result { let val = self.read_vlq()?; Ok((val >> 1) as i64 ^ -((val & 1) as i64)) } - fn read_list_set_begin(&mut self) -> Result<(ElementType, i32)> { + /// Read the [`ListIdentifier`] for a Thrift encoded list. + fn read_list_begin(&mut self) -> Result { let header = self.read_byte()?; let element_type = ElementType::try_from(header & 0x0f)?; @@ -233,162 +233,118 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { self.read_vlq()? as _ }; - Ok((element_type, element_count)) - } - - pub(crate) fn read_struct_begin(&mut self) -> Result<()> { - self.read_field_id_stack.push(self.last_read_field_id); - self.last_read_field_id = 0; - Ok(()) - } - - pub(crate) fn read_struct_end(&mut self) -> Result<()> { - self.last_read_field_id = self - .read_field_id_stack - .pop() - .expect("should have previous field ids"); - Ok(()) - } - - // This is a specialized version of read_field_begin, solely for use in parsing - // PageLocation structs in the offset index. This function assumes that the delta - // field will always be less than 0xf, fields will be in order, and no boolean fields - // will be read. This also skips validation of the field type. - // - // Returns a tuple of (field_type, field_delta) - pub(crate) fn read_field_header(&mut self) -> Result<(u8, u8)> { - let field_type = self.read_byte()?; - let field_delta = (field_type & 0xf0) >> 4; - let field_type = field_type & 0xf; - Ok((field_type, field_delta)) + Ok(ListIdentifier { + element_type, + size: element_count, + }) } - pub(crate) fn read_field_begin(&mut self) -> Result { + /// Read the [`FieldIdentifier`] for a field in a Thrift encoded struct. + fn read_field_begin(&mut self, last_field_id: i16) -> Result { // we can read at least one byte, which is: // - the type // - the field delta and the type let field_type = self.read_byte()?; let field_delta = (field_type & 0xf0) >> 4; let field_type = FieldType::try_from(field_type & 0xf)?; + let mut bool_val: Option = None; match field_type { FieldType::Stop => Ok(FieldIdentifier { field_type: FieldType::Stop, id: 0, + bool_val, }), _ => { // special handling for bools if field_type == FieldType::BooleanFalse { - self.pending_read_bool_value = Some(false); + bool_val = Some(false); } else if field_type == FieldType::BooleanTrue { - self.pending_read_bool_value = Some(true); + bool_val = Some(true); } - if field_delta != 0 { - self.last_read_field_id = self - .last_read_field_id - .checked_add(field_delta as i16) - .map_or_else( - || { - Err(general_err!(format!( - "cannot add {} to {}", - field_delta, self.last_read_field_id - ))) - }, - Ok, - )?; + let field_id = if field_delta != 0 { + last_field_id.checked_add(field_delta as i16).map_or_else( + || { + Err(general_err!(format!( + "cannot add {} to {}", + field_delta, last_field_id + ))) + }, + Ok, + )? } else { - self.last_read_field_id = self.read_i16()?; + self.read_i16()? }; Ok(FieldIdentifier { field_type, - id: self.last_read_field_id, + id: field_id, + bool_val, }) } } } - pub(crate) fn read_bool(&mut self) -> Result { - match self.pending_read_bool_value.take() { - Some(b) => Ok(b), - None => { - let b = self.read_byte()?; - // Previous versions of the thrift specification said to use 0 and 1 inside collections, - // but that differed from existing implementations. - // The specification was updated in https://github.com/apache/thrift/commit/2c29c5665bc442e703480bb0ee60fe925ffe02e8. - // At least the go implementation seems to have followed the previously documented values. - match b { - 0x01 => Ok(true), - 0x00 | 0x02 => Ok(false), - unkn => Err(general_err!(format!("cannot convert {unkn} into bool"))), - } - } - } + /// This is a specialized version of [`Self::read_field_begin`], solely for use in parsing + /// simple structs. This function assumes that the delta field will always be less than 0xf, + /// fields will be in order, and no boolean fields will be read. + /// This also skips validation of the field type. + /// + /// Returns a tuple of `(field_type, field_delta)`. + fn read_field_header(&mut self) -> Result<(u8, u8)> { + let field_type = self.read_byte()?; + let field_delta = (field_type & 0xf0) >> 4; + let field_type = field_type & 0xf; + Ok((field_type, field_delta)) } - pub(crate) fn read_bytes(&mut self) -> Result<&'b [u8]> { - let len = self.read_vlq()? as usize; - let ret = self.buf.get(..len).ok_or_else(eof_error)?; - self.buf = &self.buf[len..]; - Ok(ret) + /// Read a boolean list element. This should not be used for struct fields. For the latter, + /// use the [`FieldIdentifier::bool_val`] field. + fn read_bool(&mut self) -> Result { + let b = self.read_byte()?; + // Previous versions of the thrift specification said to use 0 and 1 inside collections, + // but that differed from existing implementations. + // The specification was updated in https://github.com/apache/thrift/commit/2c29c5665bc442e703480bb0ee60fe925ffe02e8. + // At least the go implementation seems to have followed the previously documented values. + match b { + 0x01 => Ok(true), + 0x00 | 0x02 => Ok(false), + unkn => Err(general_err!(format!("cannot convert {unkn} into bool"))), + } } - pub(crate) fn read_string(&mut self) -> Result<&'b str> { + /// Read a Thrift [binary] as a UTF-8 encoded string. + /// + /// [binary]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#binary-encoding + fn read_string(&mut self) -> Result<&'a str> { let slice = self.read_bytes()?; Ok(std::str::from_utf8(slice)?) } - pub(crate) fn read_i8(&mut self) -> Result { + /// Read an `i8`. + fn read_i8(&mut self) -> Result { Ok(self.read_byte()? as _) } - pub(crate) fn read_i16(&mut self) -> Result { + /// Read an `i16`. + fn read_i16(&mut self) -> Result { Ok(self.read_zig_zag()? as _) } - pub(crate) fn read_i32(&mut self) -> Result { + /// Read an `i32`. + fn read_i32(&mut self) -> Result { Ok(self.read_zig_zag()? as _) } - pub(crate) fn read_i64(&mut self) -> Result { + /// Read an `i64`. + fn read_i64(&mut self) -> Result { self.read_zig_zag() } - pub(crate) fn read_double(&mut self) -> Result { - let slice = self.buf.get(..8).ok_or_else(eof_error)?; - self.buf = &self.buf[8..]; - match slice.try_into() { - Ok(slice) => Ok(f64::from_le_bytes(slice)), - Err(_) => Err(general_err!("Unexpected error converting slice")), - } - } - - pub(crate) fn read_list_begin(&mut self) -> Result { - let (element_type, element_count) = self.read_list_set_begin()?; - Ok(ListIdentifier { - element_type, - size: element_count, - }) - } - - pub(crate) fn read_list_end(&mut self) -> Result<()> { - Ok(()) - } - - #[inline] - fn read_byte(&mut self) -> Result { - let ret = *self.buf.first().ok_or_else(eof_error)?; - self.buf = &self.buf[1..]; - Ok(ret) - } - - #[inline] - fn skip_bytes(&mut self, n: usize) -> Result<()> { - self.buf.get(..n).ok_or_else(eof_error)?; - self.buf = &self.buf[n..]; - Ok(()) - } + /// Read a Thrift `double` as `f64`. + fn read_double(&mut self) -> Result; + /// Skip a ULEB128 encoded varint. fn skip_vlq(&mut self) -> Result<()> { loop { let byte = self.read_byte()?; @@ -398,21 +354,25 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { } } + /// Skip a thrift [binary]. + /// + /// [binary]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#binary-encoding fn skip_binary(&mut self) -> Result<()> { let len = self.read_vlq()? as usize; self.skip_bytes(len) } /// Skip a field with type `field_type` recursively until the default - /// maximum skip depth is reached. - pub(crate) fn skip(&mut self, field_type: FieldType) -> Result<()> { - // TODO: magic number - self.skip_till_depth(field_type, 64) + /// maximum skip depth (currently 64) is reached. + fn skip(&mut self, field_type: FieldType) -> Result<()> { + const DEFAULT_SKIP_DEPTH: i8 = 64; + self.skip_till_depth(field_type, DEFAULT_SKIP_DEPTH) } /// Empty structs in unions consist of a single byte of 0 for the field stop record. - /// This skips that byte without pushing to the field id stack. - pub(crate) fn skip_empty_struct(&mut self) -> Result<()> { + /// This skips that byte without encuring the cost of processing the [`FieldIdentifier`]. + /// Will return an error if the struct is not actually empty. + fn skip_empty_struct(&mut self) -> Result<()> { let b = self.read_byte()?; if b != 0 { Err(general_err!("Empty struct has fields")) @@ -428,7 +388,8 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { } match field_type { - FieldType::BooleanFalse | FieldType::BooleanTrue => self.read_bool().map(|_| ()), + // boolean field has no data + FieldType::BooleanFalse | FieldType::BooleanTrue => Ok(()), FieldType::Byte => self.read_i8().map(|_| ()), FieldType::I16 => self.skip_vlq().map(|_| ()), FieldType::I32 => self.skip_vlq().map(|_| ()), @@ -436,15 +397,16 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { FieldType::Double => self.skip_bytes(8).map(|_| ()), FieldType::Binary => self.skip_binary().map(|_| ()), FieldType::Struct => { - self.read_struct_begin()?; + let mut last_field_id = 0i16; loop { - let field_ident = self.read_field_begin()?; + let field_ident = self.read_field_begin(last_field_id)?; if field_ident.field_type == FieldType::Stop { break; } self.skip_till_depth(field_ident.field_type, depth - 1)?; + last_field_id = field_ident.id; } - self.read_struct_end() + Ok(()) } FieldType::List => { let list_ident = self.read_list_begin()?; @@ -452,7 +414,7 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { let element_type = FieldType::try_from(list_ident.element_type)?; self.skip_till_depth(element_type, depth - 1)?; } - self.read_list_end() + Ok(()) } // no list or map types in parquet format u => Err(general_err!(format!("cannot skip field type {:?}", &u))), @@ -460,90 +422,142 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'a> { } } +/// A high performance Thrift reader that reads from a slice of bytes. +pub(crate) struct ThriftSliceInputProtocol<'a> { + buf: &'a [u8], +} + +impl<'a> ThriftSliceInputProtocol<'a> { + /// Create a new `ThriftSliceInputProtocol` using the bytes in `buf`. + pub fn new(buf: &'a [u8]) -> Self { + Self { buf } + } + + /// Re-initialize this reader with a new slice. + pub fn reset_buffer(&mut self, buf: &'a [u8]) { + self.buf = buf; + } + + /// Return the current buffer as a slice. + pub fn as_slice(&self) -> &'a [u8] { + self.buf + } +} + +impl<'b, 'a: 'b> ThriftCompactInputProtocol<'b> for ThriftSliceInputProtocol<'a> { + #[inline] + fn read_byte(&mut self) -> Result { + let ret = *self.buf.first().ok_or_else(eof_error)?; + self.buf = &self.buf[1..]; + Ok(ret) + } + + fn read_bytes(&mut self) -> Result<&'b [u8]> { + let len = self.read_vlq()? as usize; + let ret = self.buf.get(..len).ok_or_else(eof_error)?; + self.buf = &self.buf[len..]; + Ok(ret) + } + + #[inline] + fn skip_bytes(&mut self, n: usize) -> Result<()> { + self.buf.get(..n).ok_or_else(eof_error)?; + self.buf = &self.buf[n..]; + Ok(()) + } + + fn read_double(&mut self) -> Result { + let slice = self.buf.get(..8).ok_or_else(eof_error)?; + self.buf = &self.buf[8..]; + match slice.try_into() { + Ok(slice) => Ok(f64::from_le_bytes(slice)), + Err(_) => Err(general_err!("Unexpected error converting slice")), + } + } +} + fn eof_error() -> ParquetError { eof_err!("Unexpected EOF") } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for bool { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +/// Trait implemented for objects that can be deserialized from a Thrift input stream. +/// Implementations are provided for Thrift primitive types. +pub(crate) trait ReadThrift<'a, R: ThriftCompactInputProtocol<'a>> { + /// Read an object of type `Self` from the input protocol object. + fn read_thrift(prot: &mut R) -> Result + where + Self: Sized; +} + +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for bool { + fn read_thrift(prot: &mut R) -> Result { prot.read_bool() } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i8 { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i8 { + fn read_thrift(prot: &mut R) -> Result { prot.read_i8() } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i16 { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i16 { + fn read_thrift(prot: &mut R) -> Result { prot.read_i16() } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i32 { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i32 { + fn read_thrift(prot: &mut R) -> Result { prot.read_i32() } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for i64 { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for i64 { + fn read_thrift(prot: &mut R) -> Result { prot.read_i64() } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for OrderedF64 { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for OrderedF64 { + fn read_thrift(prot: &mut R) -> Result { Ok(OrderedF64(prot.read_double()?)) } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for &'a str { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for &'a str { + fn read_thrift(prot: &mut R) -> Result { prot.read_string() } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for String { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for String { + fn read_thrift(prot: &mut R) -> Result { Ok(prot.read_string()?.to_owned()) } } -impl<'a> TryFrom<&mut ThriftCompactInputProtocol<'a>> for &'a [u8] { - type Error = ParquetError; - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for &'a [u8] { + fn read_thrift(prot: &mut R) -> Result { prot.read_bytes() } } -impl<'a, T> TryFrom<&mut ThriftCompactInputProtocol<'a>> for Vec +/// Read a Thrift encoded [list] from the input protocol object. +/// +/// [list]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set +pub(crate) fn read_thrift_vec<'a, T, R>(prot: &mut R) -> Result> where - T: for<'b> TryFrom<&'b mut ThriftCompactInputProtocol<'a>>, - ParquetError: for<'b> From<>>::Error>, + R: ThriftCompactInputProtocol<'a>, + T: ReadThrift<'a, R>, { - type Error = ParquetError; - - fn try_from(prot: &mut ThriftCompactInputProtocol<'a>) -> Result { - let list_ident = prot.read_list_begin()?; - let mut res = Vec::with_capacity(list_ident.size as usize); - for _ in 0..list_ident.size { - let val = T::try_from(prot)?; - res.push(val); - } - - Ok(res) + let list_ident = prot.read_list_begin()?; + let mut res = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let val = T::read_thrift(prot)?; + res.push(val); } + Ok(res) } ///////////////////////// @@ -983,11 +997,7 @@ pub(crate) mod tests { pub(crate) fn test_roundtrip(val: T) where - T: for<'a> TryFrom<&'a mut ThriftCompactInputProtocol<'a>> - + WriteThrift - + PartialEq - + Debug, - for<'a> >>::Error: Debug, + T: for<'a> ReadThrift<'a, ThriftSliceInputProtocol<'a>> + WriteThrift + PartialEq + Debug, { let buf = Vec::::new(); let mut writer = ThriftCompactOutputProtocol::new(buf); @@ -995,8 +1005,8 @@ pub(crate) mod tests { //println!("serialized: {:x?}", writer.inner()); - let mut prot = ThriftCompactInputProtocol::new(writer.inner()); - let read_val = T::try_from(&mut prot).unwrap(); + let mut prot = ThriftSliceInputProtocol::new(writer.inner()); + let read_val = T::read_thrift(&mut prot).unwrap(); assert_eq!(val, read_val); } From 3dbd42e516ffea475b16033c05d9492a6d2d849e Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Tue, 23 Sep 2025 11:22:07 -0700 Subject: [PATCH 11/15] [thrift-remodel] Use new Thrift encoder/decoder for Parquet page headers (#8376) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change This continues the remodel, moving on to `PageHeader` support. # What changes are included in this PR? Swaps out old `format` page header structs for new ones. This also adds a `Read` based implementation of the thrift compact protocol reader (the sizes of the Thrift encoded page headers are not knowable in advance, so we need a way to read them from the thrift input stream used by the page decoder). This PR also makes decoding of the `Statistics` in the page header optional (defaults to `false`). We do not use them, and the decode takes a good chunk of time. # Are these changes tested? These changes should be covered by existing tests # Are there any user-facing changes? Yes, page level stats are no longer decoded by default --- parquet/benches/metadata.rs | 20 + parquet/src/arrow/arrow_writer/mod.rs | 27 +- parquet/src/column/page.rs | 40 +- parquet/src/column/page_encryption.rs | 8 +- .../src/column/page_encryption_disabled.rs | 2 +- parquet/src/column/writer/mod.rs | 1 + parquet/src/encryption/encrypt.rs | 28 ++ parquet/src/errors.rs | 8 + parquet/src/file/metadata/thrift_gen.rs | 443 ++++++++++++++---- parquet/src/file/page_encoding_stats.rs | 2 +- parquet/src/file/properties.rs | 24 + parquet/src/file/serialized_reader.rs | 67 ++- parquet/src/file/statistics.rs | 201 ++++++++ parquet/src/file/writer.rs | 11 +- parquet/src/parquet_macros.rs | 76 ++- parquet/src/parquet_thrift.rs | 61 ++- parquet/src/thrift.rs | 15 +- parquet/tests/arrow_reader/bad_data.rs | 2 +- .../tests/encryption/encryption_agnostic.rs | 4 +- 19 files changed, 861 insertions(+), 179 deletions(-) diff --git a/parquet/benches/metadata.rs b/parquet/benches/metadata.rs index 8c886e4d5eea..ced0175da878 100644 --- a/parquet/benches/metadata.rs +++ b/parquet/benches/metadata.rs @@ -258,6 +258,26 @@ fn criterion_benchmark(c: &mut Criterion) { }); }) }); + + #[cfg(feature = "arrow")] + c.bench_function("page headers (no stats)", |b| { + b.iter(|| { + metadata.row_groups.iter().for_each(|rg| { + rg.columns.iter().for_each(|col| { + if let Some(col_meta) = &col.meta_data { + if let Some(dict_offset) = col_meta.dictionary_page_offset { + parquet::thrift::bench_page_header_no_stats( + &file_bytes.slice(dict_offset as usize..), + ); + } + parquet::thrift::bench_page_header_no_stats( + &file_bytes.slice(col_meta.data_page_offset as usize..), + ); + } + }); + }); + }) + }); } criterion_group!(benches, criterion_benchmark); diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 5ccab5b24bcd..e863b74a9581 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -23,7 +23,6 @@ use std::iter::Peekable; use std::slice::Iter; use std::sync::{Arc, Mutex}; use std::vec::IntoIter; -use thrift::protocol::TCompactOutputProtocol; use arrow_array::cast::AsArray; use arrow_array::types::*; @@ -48,8 +47,8 @@ use crate::file::metadata::{KeyValue, RowGroupMetaData}; use crate::file::properties::{WriterProperties, WriterPropertiesPtr}; use crate::file::reader::{ChunkReader, Length}; use crate::file::writer::{SerializedFileWriter, SerializedRowGroupWriter}; +use crate::parquet_thrift::{ThriftCompactOutputProtocol, WriteThrift}; use crate::schema::types::{ColumnDescPtr, SchemaDescriptor}; -use crate::thrift::TSerializable; use levels::{calculate_array_levels, ArrayLevels}; mod byte_array; @@ -583,8 +582,8 @@ impl PageWriter for ArrowPageWriter { } } None => { - let mut protocol = TCompactOutputProtocol::new(&mut header); - page_header.write_to_out_protocol(&mut protocol)?; + let mut protocol = ThriftCompactOutputProtocol::new(&mut header); + page_header.write_thrift(&mut protocol)?; } }; @@ -1487,12 +1486,12 @@ mod tests { use crate::arrow::arrow_reader::{ParquetRecordBatchReader, ParquetRecordBatchReaderBuilder}; use crate::arrow::ARROW_SCHEMA_META_KEY; use crate::column::page::{Page, PageReader}; + use crate::file::metadata::thrift_gen::PageHeader; use crate::file::page_encoding_stats::PageEncodingStats; use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::reader::SerializedPageReader; - use crate::format::PageHeader; + use crate::parquet_thrift::{ReadThrift, ThriftSliceInputProtocol}; use crate::schema::types::ColumnPath; - use crate::thrift::TCompactSliceInputProtocol; use arrow::datatypes::ToByteSlice; use arrow::datatypes::{DataType, Schema}; use arrow::error::Result as ArrowResult; @@ -4191,8 +4190,8 @@ mod tests { // decode first page header let first_page = &buf[4..]; - let mut prot = TCompactSliceInputProtocol::new(first_page); - let hdr = PageHeader::read_from_in_protocol(&mut prot).unwrap(); + let mut prot = ThriftSliceInputProtocol::new(first_page); + let hdr = PageHeader::read_thrift(&mut prot).unwrap(); let stats = hdr.data_page_header.unwrap().statistics; assert!(stats.is_none()); @@ -4225,8 +4224,8 @@ mod tests { // decode first page header let first_page = &buf[4..]; - let mut prot = TCompactSliceInputProtocol::new(first_page); - let hdr = PageHeader::read_from_in_protocol(&mut prot).unwrap(); + let mut prot = ThriftSliceInputProtocol::new(first_page); + let hdr = PageHeader::read_thrift(&mut prot).unwrap(); let stats = hdr.data_page_header.unwrap().statistics; let stats = stats.unwrap(); @@ -4277,8 +4276,8 @@ mod tests { // decode first page header let first_page = &buf[4..]; - let mut prot = TCompactSliceInputProtocol::new(first_page); - let hdr = PageHeader::read_from_in_protocol(&mut prot).unwrap(); + let mut prot = ThriftSliceInputProtocol::new(first_page); + let hdr = PageHeader::read_thrift(&mut prot).unwrap(); let stats = hdr.data_page_header.unwrap().statistics; assert!(stats.is_some()); let stats = stats.unwrap(); @@ -4290,8 +4289,8 @@ mod tests { // check second page now let second_page = &prot.as_slice()[hdr.compressed_page_size as usize..]; - let mut prot = TCompactSliceInputProtocol::new(second_page); - let hdr = PageHeader::read_from_in_protocol(&mut prot).unwrap(); + let mut prot = ThriftSliceInputProtocol::new(second_page); + let hdr = PageHeader::read_thrift(&mut prot).unwrap(); let stats = hdr.data_page_header.unwrap().statistics; assert!(stats.is_some()); let stats = stats.unwrap(); diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs index a2f683d71f4e..23bf4548fbb4 100644 --- a/parquet/src/column/page.rs +++ b/parquet/src/column/page.rs @@ -21,8 +21,10 @@ use bytes::Bytes; use crate::basic::{Encoding, PageType}; use crate::errors::{ParquetError, Result}; -use crate::file::statistics::Statistics; -use crate::format::PageHeader; +use crate::file::metadata::thrift_gen::{ + DataPageHeader, DataPageHeaderV2, DictionaryPageHeader, PageHeader, +}; +use crate::file::statistics::{page_stats_to_thrift, Statistics}; /// Parquet Page definition. /// @@ -216,7 +218,7 @@ impl CompressedPage { let page_type = self.page_type(); let mut page_header = PageHeader { - type_: page_type.into(), + type_: page_type, uncompressed_page_size: uncompressed_size as i32, compressed_page_size: compressed_size as i32, // TODO: Add support for crc checksum @@ -234,12 +236,12 @@ impl CompressedPage { ref statistics, .. } => { - let data_page_header = crate::format::DataPageHeader { + let data_page_header = DataPageHeader { num_values: num_values as i32, - encoding: encoding.into(), - definition_level_encoding: def_level_encoding.into(), - repetition_level_encoding: rep_level_encoding.into(), - statistics: crate::file::statistics::to_thrift(statistics.as_ref()), + encoding, + definition_level_encoding: def_level_encoding, + repetition_level_encoding: rep_level_encoding, + statistics: page_stats_to_thrift(statistics.as_ref()), }; page_header.data_page_header = Some(data_page_header); } @@ -252,22 +254,22 @@ impl CompressedPage { ref statistics, .. } => { - let data_page_header_v2 = crate::format::DataPageHeaderV2 { + let data_page_header_v2 = DataPageHeaderV2 { num_values: num_values as i32, num_nulls: num_nulls as i32, num_rows: num_rows as i32, - encoding: encoding.into(), + encoding, definition_levels_byte_length: def_levels_byte_len as i32, repetition_levels_byte_length: rep_levels_byte_len as i32, is_compressed: Some(is_compressed), - statistics: crate::file::statistics::to_thrift(statistics.as_ref()), + statistics: page_stats_to_thrift(statistics.as_ref()), }; page_header.data_page_header_v2 = Some(data_page_header_v2); } Page::DictionaryPage { is_sorted, .. } => { - let dictionary_page_header = crate::format::DictionaryPageHeader { + let dictionary_page_header = DictionaryPageHeader { num_values: num_values as i32, - encoding: encoding.into(), + encoding, is_sorted: Some(is_sorted), }; page_header.dictionary_page_header = Some(dictionary_page_header); @@ -343,12 +345,14 @@ pub struct PageMetadata { pub is_dict: bool, } -impl TryFrom<&PageHeader> for PageMetadata { +impl TryFrom<&crate::file::metadata::thrift_gen::PageHeader> for PageMetadata { type Error = ParquetError; - fn try_from(value: &PageHeader) -> std::result::Result { + fn try_from( + value: &crate::file::metadata::thrift_gen::PageHeader, + ) -> std::result::Result { match value.type_ { - crate::format::PageType::DATA_PAGE => { + PageType::DATA_PAGE => { let header = value.data_page_header.as_ref().unwrap(); Ok(PageMetadata { num_rows: None, @@ -356,12 +360,12 @@ impl TryFrom<&PageHeader> for PageMetadata { is_dict: false, }) } - crate::format::PageType::DICTIONARY_PAGE => Ok(PageMetadata { + PageType::DICTIONARY_PAGE => Ok(PageMetadata { num_rows: None, num_levels: None, is_dict: true, }), - crate::format::PageType::DATA_PAGE_V2 => { + PageType::DATA_PAGE_V2 => { let header = value.data_page_header_v2.as_ref().unwrap(); Ok(PageMetadata { num_rows: Some(header.num_rows as _), diff --git a/parquet/src/column/page_encryption.rs b/parquet/src/column/page_encryption.rs index 0fb7c8942675..7ee367a289c8 100644 --- a/parquet/src/column/page_encryption.rs +++ b/parquet/src/column/page_encryption.rs @@ -15,14 +15,14 @@ // specific language governing permissions and limitations // under the License. +use crate::basic::PageType; use crate::column::page::CompressedPage; use crate::encryption::ciphers::BlockEncryptor; -use crate::encryption::encrypt::{encrypt_object, FileEncryptor}; +use crate::encryption::encrypt::{encrypt_thrift_object, FileEncryptor}; use crate::encryption::modules::{create_module_aad, ModuleType}; use crate::errors::ParquetError; use crate::errors::Result; -use crate::format::PageHeader; -use crate::format::PageType; +use crate::file::metadata::thrift_gen::PageHeader; use bytes::Bytes; use std::io::Write; use std::sync::Arc; @@ -114,6 +114,6 @@ impl PageEncryptor { Some(self.page_index), )?; - encrypt_object(page_header, &mut self.block_encryptor, sink, &aad) + encrypt_thrift_object(page_header, &mut self.block_encryptor, sink, &aad) } } diff --git a/parquet/src/column/page_encryption_disabled.rs b/parquet/src/column/page_encryption_disabled.rs index e85b0281168a..347024f7f21f 100644 --- a/parquet/src/column/page_encryption_disabled.rs +++ b/parquet/src/column/page_encryption_disabled.rs @@ -17,7 +17,7 @@ use crate::column::page::CompressedPage; use crate::errors::Result; -use crate::format::PageHeader; +use crate::file::metadata::thrift_gen::PageHeader; use std::io::Write; #[derive(Debug)] diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index e5a9139fb7ea..059287011b17 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -2264,6 +2264,7 @@ mod tests { let props = ReaderProperties::builder() .set_backward_compatible_lz4(false) + .set_read_page_statistics(true) .build(); let reader = SerializedPageReader::new_with_properties( Arc::new(Bytes::from(buf)), diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index 1a241bf7b170..97893021699e 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -22,6 +22,7 @@ use crate::encryption::ciphers::{ }; use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey}; +use crate::parquet_thrift::{ThriftCompactOutputProtocol, WriteThrift}; use crate::schema::types::{ColumnDescPtr, SchemaDescriptor}; use crate::thrift::TSerializable; use ring::rand::{SecureRandom, SystemRandom}; @@ -376,6 +377,18 @@ pub(crate) fn encrypt_object( Ok(()) } +/// Write an encrypted Thrift serializable object +pub(crate) fn encrypt_thrift_object( + object: &T, + encryptor: &mut Box, + sink: &mut W, + module_aad: &[u8], +) -> Result<()> { + let encrypted_buffer = encrypt_thrift_object_to_vec(object, encryptor, module_aad)?; + sink.write_all(&encrypted_buffer)?; + Ok(()) +} + pub(crate) fn write_signed_plaintext_object( object: &T, encryptor: &mut Box, @@ -414,6 +427,21 @@ pub(crate) fn encrypt_object_to_vec( encryptor.encrypt(buffer.as_ref(), module_aad) } +/// Encrypt a Thrift serializable object to a byte vector +pub(crate) fn encrypt_thrift_object_to_vec( + object: &T, + encryptor: &mut Box, + module_aad: &[u8], +) -> Result> { + let mut buffer: Vec = vec![]; + { + let mut unencrypted_protocol = ThriftCompactOutputProtocol::new(&mut buffer); + object.write_thrift(&mut unencrypted_protocol)?; + } + + encryptor.encrypt(buffer.as_ref(), module_aad) +} + /// Get the crypto metadata for a column from the file encryption properties pub(crate) fn get_column_crypto_metadata( properties: &FileEncryptionProperties, diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs index be08245e956c..dab444a28f4f 100644 --- a/parquet/src/errors.rs +++ b/parquet/src/errors.rs @@ -19,6 +19,7 @@ use core::num::TryFromIntError; use std::error::Error; +use std::string::FromUtf8Error; use std::{cell, io, result, str}; #[cfg(feature = "arrow")] @@ -124,6 +125,13 @@ impl From for ParquetError { ParquetError::External(Box::new(e)) } } + +impl From for ParquetError { + fn from(e: FromUtf8Error) -> ParquetError { + ParquetError::External(Box::new(e)) + } +} + #[cfg(feature = "arrow")] impl From for ParquetError { fn from(e: ArrowError) -> ParquetError { diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs index b656bacc8c7d..7515a70a63f1 100644 --- a/parquet/src/file/metadata/thrift_gen.rs +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -67,103 +67,6 @@ pub(crate) struct SchemaElement<'a> { } ); -thrift_struct!( -pub(crate) struct DataPageHeader { - /// Number of values, including NULLs, in this data page. - /// - /// If a OffsetIndex is present, a page must begin at a row - /// boundary (repetition_level = 0). Otherwise, pages may begin - /// within a row (repetition_level > 0). - 1: required i32 num_values - - /// Encoding used for this data page - 2: required Encoding encoding - - /// Encoding used for definition levels - 3: required Encoding definition_level_encoding; - - /// Encoding used for repetition levels - 4: required Encoding repetition_level_encoding; - - // Optional statistics for the data in this page - // page stats are pretty useless...lets ignore them - //5: optional Statistics statistics; -} -); - -thrift_struct!( - pub(crate) struct IndexPageHeader {} -); - -thrift_struct!( -pub(crate) struct DictionaryPageHeader { - /// Number of values in the dictionary - 1: required i32 num_values; - - /// Encoding using this dictionary page - 2: required Encoding encoding - - /// If true, the entries in the dictionary are sorted in ascending order - 3: optional bool is_sorted; -} -); - -thrift_struct!( -pub(crate) struct DataPageHeaderV2 { - /// Number of values, including NULLs, in this data page. - 1: required i32 num_values - /// Number of NULL values, in this data page. - /// Number of non-null = num_values - num_nulls which is also the number of values in the data section - 2: required i32 num_nulls - /// Number of rows in this data page. Every page must begin at a - /// row boundary (repetition_level = 0): rows must **not** be - /// split across page boundaries when using V2 data pages. - 3: required i32 num_rows - /// Encoding used for data in this page - 4: required Encoding encoding - - // repetition levels and definition levels are always using RLE (without size in it) - - /// Length of the definition levels - 5: required i32 definition_levels_byte_length; - /// Length of the repetition levels - 6: required i32 repetition_levels_byte_length; - - /// Whether the values are compressed. - /// Which means the section of the page between - /// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) - /// is compressed with the compression_codec. - /// If missing it is considered compressed - 7: optional bool is_compressed = true; - - // Optional statistics for the data in this page - //8: optional Statistics statistics; -} -); - -thrift_struct!( -#[allow(dead_code)] -pub(crate) struct PageHeader { - /// the type of the page: indicates which of the *_header fields is set - 1: required PageType type_ - - /// Uncompressed page size in bytes (not including this header) - 2: required i32 uncompressed_page_size - - /// Compressed (and potentially encrypted) page size in bytes, not including this header - 3: required i32 compressed_page_size - - /// The 32-bit CRC checksum for the page, to be be calculated as follows: - 4: optional i32 crc - - // Headers for page specific data. One only will be set. - 5: optional DataPageHeader data_page_header; - 6: optional IndexPageHeader index_page_header; - 7: optional DictionaryPageHeader dictionary_page_header; - 8: optional DataPageHeaderV2 data_page_header_v2; -} -); - thrift_struct!( pub(crate) struct AesGcmV1<'a> { /// AAD prefix @@ -214,6 +117,13 @@ pub(crate) struct FileCryptoMetaData<'a> { } ); +// expose for benchmarking +pub(crate) fn bench_file_metadata(bytes: &bytes::Bytes) { + use crate::parquet_thrift::{ReadThrift, ThriftSliceInputProtocol}; + let mut prot = ThriftSliceInputProtocol::new(bytes); + crate::file::metadata::thrift_gen::FileMetaData::read_thrift(&mut prot).unwrap(); +} + // the following are only used internally so are private thrift_struct!( struct FileMetaData<'a> { @@ -909,6 +819,345 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ParquetMetaDat } } +thrift_struct!( + pub(crate) struct IndexPageHeader {} +); + +thrift_struct!( +pub(crate) struct DictionaryPageHeader { + /// Number of values in the dictionary + 1: required i32 num_values; + + /// Encoding using this dictionary page + 2: required Encoding encoding + + /// If true, the entries in the dictionary are sorted in ascending order + 3: optional bool is_sorted; +} +); + +thrift_struct!( +/// Statistics for the page header. +/// +/// This is a duplicate of the [`Statistics`] struct above. Because the page reader uses +/// the [`Read`] API, we cannot read the min/max values as slices. This should not be +/// a huge problem since this crate no longer reads the page header statistics by default. +/// +/// [`Read`]: crate::parquet_thrift::ThriftReadInputProtocol +pub(crate) struct PageStatistics { + 1: optional binary max; + 2: optional binary min; + 3: optional i64 null_count; + 4: optional i64 distinct_count; + 5: optional binary max_value; + 6: optional binary min_value; + 7: optional bool is_max_value_exact; + 8: optional bool is_min_value_exact; +} +); + +thrift_struct!( +pub(crate) struct DataPageHeader { + 1: required i32 num_values + 2: required Encoding encoding + 3: required Encoding definition_level_encoding; + 4: required Encoding repetition_level_encoding; + 5: optional PageStatistics statistics; +} +); + +impl DataPageHeader { + // reader that skips decoding page statistics + fn read_thrift_without_stats<'a, R>(prot: &mut R) -> Result + where + R: ThriftCompactInputProtocol<'a>, + { + let mut num_values: Option = None; + let mut encoding: Option = None; + let mut definition_level_encoding: Option = None; + let mut repetition_level_encoding: Option = None; + let statistics: Option = None; + let mut last_field_id = 0i16; + loop { + let field_ident = prot.read_field_begin(last_field_id)?; + if field_ident.field_type == FieldType::Stop { + break; + } + match field_ident.id { + 1 => { + let val = i32::read_thrift(&mut *prot)?; + num_values = Some(val); + } + 2 => { + let val = Encoding::read_thrift(&mut *prot)?; + encoding = Some(val); + } + 3 => { + let val = Encoding::read_thrift(&mut *prot)?; + definition_level_encoding = Some(val); + } + 4 => { + let val = Encoding::read_thrift(&mut *prot)?; + repetition_level_encoding = Some(val); + } + _ => { + prot.skip(field_ident.field_type)?; + } + }; + last_field_id = field_ident.id; + } + let Some(num_values) = num_values else { + return Err(ParquetError::General( + "Required field num_values is missing".to_owned(), + )); + }; + let Some(encoding) = encoding else { + return Err(ParquetError::General( + "Required field encoding is missing".to_owned(), + )); + }; + let Some(definition_level_encoding) = definition_level_encoding else { + return Err(ParquetError::General( + "Required field definition_level_encoding is missing".to_owned(), + )); + }; + let Some(repetition_level_encoding) = repetition_level_encoding else { + return Err(ParquetError::General( + "Required field repetition_level_encoding is missing".to_owned(), + )); + }; + Ok(Self { + num_values, + encoding, + definition_level_encoding, + repetition_level_encoding, + statistics, + }) + } +} + +thrift_struct!( +pub(crate) struct DataPageHeaderV2 { + 1: required i32 num_values + 2: required i32 num_nulls + 3: required i32 num_rows + 4: required Encoding encoding + 5: required i32 definition_levels_byte_length; + 6: required i32 repetition_levels_byte_length; + 7: optional bool is_compressed = true; + 8: optional PageStatistics statistics; +} +); + +impl DataPageHeaderV2 { + // reader that skips decoding page statistics + fn read_thrift_without_stats<'a, R>(prot: &mut R) -> Result + where + R: ThriftCompactInputProtocol<'a>, + { + let mut num_values: Option = None; + let mut num_nulls: Option = None; + let mut num_rows: Option = None; + let mut encoding: Option = None; + let mut definition_levels_byte_length: Option = None; + let mut repetition_levels_byte_length: Option = None; + let mut is_compressed: Option = None; + let statistics: Option = None; + let mut last_field_id = 0i16; + loop { + let field_ident = prot.read_field_begin(last_field_id)?; + if field_ident.field_type == FieldType::Stop { + break; + } + match field_ident.id { + 1 => { + let val = i32::read_thrift(&mut *prot)?; + num_values = Some(val); + } + 2 => { + let val = i32::read_thrift(&mut *prot)?; + num_nulls = Some(val); + } + 3 => { + let val = i32::read_thrift(&mut *prot)?; + num_rows = Some(val); + } + 4 => { + let val = Encoding::read_thrift(&mut *prot)?; + encoding = Some(val); + } + 5 => { + let val = i32::read_thrift(&mut *prot)?; + definition_levels_byte_length = Some(val); + } + 6 => { + let val = i32::read_thrift(&mut *prot)?; + repetition_levels_byte_length = Some(val); + } + 7 => { + let val = field_ident.bool_val.unwrap(); + is_compressed = Some(val); + } + _ => { + prot.skip(field_ident.field_type)?; + } + }; + last_field_id = field_ident.id; + } + let Some(num_values) = num_values else { + return Err(ParquetError::General( + "Required field num_values is missing".to_owned(), + )); + }; + let Some(num_nulls) = num_nulls else { + return Err(ParquetError::General( + "Required field num_nulls is missing".to_owned(), + )); + }; + let Some(num_rows) = num_rows else { + return Err(ParquetError::General( + "Required field num_rows is missing".to_owned(), + )); + }; + let Some(encoding) = encoding else { + return Err(ParquetError::General( + "Required field encoding is missing".to_owned(), + )); + }; + let Some(definition_levels_byte_length) = definition_levels_byte_length else { + return Err(ParquetError::General( + "Required field definition_levels_byte_length is missing".to_owned(), + )); + }; + let Some(repetition_levels_byte_length) = repetition_levels_byte_length else { + return Err(ParquetError::General( + "Required field repetition_levels_byte_length is missing".to_owned(), + )); + }; + Ok(Self { + num_values, + num_nulls, + num_rows, + encoding, + definition_levels_byte_length, + repetition_levels_byte_length, + is_compressed, + statistics, + }) + } +} + +thrift_struct!( +pub(crate) struct PageHeader { + /// the type of the page: indicates which of the *_header fields is set + 1: required PageType type_ + + /// Uncompressed page size in bytes (not including this header) + 2: required i32 uncompressed_page_size + + /// Compressed (and potentially encrypted) page size in bytes, not including this header + 3: required i32 compressed_page_size + + /// The 32-bit CRC checksum for the page, to be be calculated as follows: + 4: optional i32 crc + + // Headers for page specific data. One only will be set. + 5: optional DataPageHeader data_page_header; + 6: optional IndexPageHeader index_page_header; + 7: optional DictionaryPageHeader dictionary_page_header; + 8: optional DataPageHeaderV2 data_page_header_v2; +} +); + +impl PageHeader { + // reader that skips reading page statistics. obtained by running + // `cargo expand -p parquet --all-features --lib file::metadata::thrift_gen` + // and modifying the impl of `read_thrift` + pub(crate) fn read_thrift_without_stats<'a, R>(prot: &mut R) -> Result + where + R: ThriftCompactInputProtocol<'a>, + { + let mut type_: Option = None; + let mut uncompressed_page_size: Option = None; + let mut compressed_page_size: Option = None; + let mut crc: Option = None; + let mut data_page_header: Option = None; + let mut index_page_header: Option = None; + let mut dictionary_page_header: Option = None; + let mut data_page_header_v2: Option = None; + let mut last_field_id = 0i16; + loop { + let field_ident = prot.read_field_begin(last_field_id)?; + if field_ident.field_type == FieldType::Stop { + break; + } + match field_ident.id { + 1 => { + let val = PageType::read_thrift(&mut *prot)?; + type_ = Some(val); + } + 2 => { + let val = i32::read_thrift(&mut *prot)?; + uncompressed_page_size = Some(val); + } + 3 => { + let val = i32::read_thrift(&mut *prot)?; + compressed_page_size = Some(val); + } + 4 => { + let val = i32::read_thrift(&mut *prot)?; + crc = Some(val); + } + 5 => { + let val = DataPageHeader::read_thrift_without_stats(&mut *prot)?; + data_page_header = Some(val); + } + 6 => { + let val = IndexPageHeader::read_thrift(&mut *prot)?; + index_page_header = Some(val); + } + 7 => { + let val = DictionaryPageHeader::read_thrift(&mut *prot)?; + dictionary_page_header = Some(val); + } + 8 => { + let val = DataPageHeaderV2::read_thrift_without_stats(&mut *prot)?; + data_page_header_v2 = Some(val); + } + _ => { + prot.skip(field_ident.field_type)?; + } + }; + last_field_id = field_ident.id; + } + let Some(type_) = type_ else { + return Err(ParquetError::General( + "Required field type_ is missing".to_owned(), + )); + }; + let Some(uncompressed_page_size) = uncompressed_page_size else { + return Err(ParquetError::General( + "Required field uncompressed_page_size is missing".to_owned(), + )); + }; + let Some(compressed_page_size) = compressed_page_size else { + return Err(ParquetError::General( + "Required field compressed_page_size is missing".to_owned(), + )); + }; + Ok(Self { + type_, + uncompressed_page_size, + compressed_page_size, + crc, + data_page_header, + index_page_header, + dictionary_page_header, + data_page_header_v2, + }) + } +} + #[cfg(test)] mod tests { use crate::file::metadata::thrift_gen::BoundingBox; diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs index 934e177de0da..3f81353e28dd 100644 --- a/parquet/src/file/page_encoding_stats.rs +++ b/parquet/src/file/page_encoding_stats.rs @@ -20,7 +20,7 @@ use std::io::Write; use crate::basic::{Encoding, PageType}; -use crate::errors::Result; +use crate::errors::{ParquetError, Result}; use crate::parquet_thrift::{ ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index cb6b5167c8dd..b6003dc4d9dc 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -1191,6 +1191,7 @@ impl ColumnProperties { pub type ReaderPropertiesPtr = Arc; const DEFAULT_READ_BLOOM_FILTER: bool = false; +const DEFAULT_READ_PAGE_STATS: bool = false; /// Configuration settings for reading parquet files. /// @@ -1213,6 +1214,7 @@ const DEFAULT_READ_BLOOM_FILTER: bool = false; pub struct ReaderProperties { codec_options: CodecOptions, read_bloom_filter: bool, + read_page_stats: bool, } impl ReaderProperties { @@ -1230,6 +1232,11 @@ impl ReaderProperties { pub(crate) fn read_bloom_filter(&self) -> bool { self.read_bloom_filter } + + /// Returns whether to read page level statistics + pub(crate) fn read_page_stats(&self) -> bool { + self.read_page_stats + } } /// Builder for parquet file reader configuration. See example on @@ -1237,6 +1244,7 @@ impl ReaderProperties { pub struct ReaderPropertiesBuilder { codec_options_builder: CodecOptionsBuilder, read_bloom_filter: Option, + read_page_stats: Option, } /// Reader properties builder. @@ -1246,6 +1254,7 @@ impl ReaderPropertiesBuilder { Self { codec_options_builder: CodecOptionsBuilder::default(), read_bloom_filter: None, + read_page_stats: None, } } @@ -1254,6 +1263,7 @@ impl ReaderPropertiesBuilder { ReaderProperties { codec_options: self.codec_options_builder.build(), read_bloom_filter: self.read_bloom_filter.unwrap_or(DEFAULT_READ_BLOOM_FILTER), + read_page_stats: self.read_page_stats.unwrap_or(DEFAULT_READ_PAGE_STATS), } } @@ -1282,6 +1292,20 @@ impl ReaderPropertiesBuilder { self.read_bloom_filter = Some(value); self } + + /// Enable/disable reading page-level statistics + /// + /// If set to `true`, then the reader will decode and populate the [`Statistics`] for + /// each page, if present. + /// If set to `false`, then the reader will skip decoding the statistics. + /// + /// By default statistics will not be decoded. + /// + /// [`Statistics`]: crate::file::statistics::Statistics + pub fn set_read_page_statistics(mut self, value: bool) -> Self { + self.read_page_stats = Some(value); + self + } } #[cfg(test)] diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 728598045315..1442f0f67ca0 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -18,31 +18,30 @@ //! Contains implementations of the reader traits FileReader, RowGroupReader and PageReader //! Also contains implementations of the ChunkReader for files (with buffering) and byte arrays (RAM) -use crate::basic::{Encoding, Type}; +use crate::basic::{PageType, Type}; use crate::bloom_filter::Sbbf; use crate::column::page::{Page, PageMetadata, PageReader}; use crate::compression::{create_codec, Codec}; #[cfg(feature = "encryption")] use crate::encryption::decrypt::{read_and_decrypt, CryptoContext}; use crate::errors::{ParquetError, Result}; +use crate::file::metadata::thrift_gen::PageHeader; use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; +use crate::file::statistics; use crate::file::{ metadata::*, properties::{ReaderProperties, ReaderPropertiesPtr}, reader::*, - statistics, }; -use crate::format::{PageHeader, PageType}; +#[cfg(feature = "encryption")] +use crate::parquet_thrift::ThriftSliceInputProtocol; +use crate::parquet_thrift::{ReadThrift, ThriftReadInputProtocol}; use crate::record::reader::RowIter; use crate::record::Row; use crate::schema::types::Type as SchemaType; -#[cfg(feature = "encryption")] -use crate::thrift::TCompactSliceInputProtocol; -use crate::thrift::TSerializable; use bytes::Bytes; use std::collections::VecDeque; use std::{fs::File, io::Read, path::Path, sync::Arc}; -use thrift::protocol::TCompactInputProtocol; impl TryFrom for SerializedFileReader { type Error = ParquetError; @@ -423,7 +422,7 @@ pub(crate) fn decode_page( Page::DictionaryPage { buf: buffer, num_values: dict_header.num_values.try_into()?, - encoding: Encoding::try_from(dict_header.encoding)?, + encoding: dict_header.encoding, is_sorted, } } @@ -434,10 +433,10 @@ pub(crate) fn decode_page( Page::DataPage { buf: buffer, num_values: header.num_values.try_into()?, - encoding: Encoding::try_from(header.encoding)?, - def_level_encoding: Encoding::try_from(header.definition_level_encoding)?, - rep_level_encoding: Encoding::try_from(header.repetition_level_encoding)?, - statistics: statistics::from_thrift(physical_type, header.statistics)?, + encoding: header.encoding, + def_level_encoding: header.definition_level_encoding, + rep_level_encoding: header.repetition_level_encoding, + statistics: statistics::from_thrift_page_stats(physical_type, header.statistics)?, } } PageType::DATA_PAGE_V2 => { @@ -448,13 +447,13 @@ pub(crate) fn decode_page( Page::DataPageV2 { buf: buffer, num_values: header.num_values.try_into()?, - encoding: Encoding::try_from(header.encoding)?, + encoding: header.encoding, num_nulls: header.num_nulls.try_into()?, num_rows: header.num_rows.try_into()?, def_levels_byte_len: header.definition_levels_byte_length.try_into()?, rep_levels_byte_len: header.repetition_levels_byte_length.try_into()?, is_compressed, - statistics: statistics::from_thrift(physical_type, header.statistics)?, + statistics: statistics::from_thrift_page_stats(physical_type, header.statistics)?, } } _ => { @@ -499,6 +498,8 @@ enum SerializedPageReaderState { #[derive(Default)] struct SerializedPageReaderContext { + /// Controls decoding of page-level statistics + read_stats: bool, /// Crypto context carrying objects required for decryption #[cfg(feature = "encryption")] crypto_context: Option>, @@ -610,12 +611,16 @@ impl SerializedPageReader { require_dictionary: meta.dictionary_page_offset().is_some(), }, }; + let mut context = SerializedPageReaderContext::default(); + if props.read_page_stats() { + context.read_stats = true; + } Ok(Self { reader, decompressor, state, physical_type: meta.column_type(), - context: Default::default(), + context, }) } @@ -732,8 +737,12 @@ impl SerializedPageReaderContext { _page_index: usize, _dictionary_page: bool, ) -> Result { - let mut prot = TCompactInputProtocol::new(input); - Ok(PageHeader::read_from_in_protocol(&mut prot)?) + let mut prot = ThriftReadInputProtocol::new(input); + if self.read_stats { + Ok(PageHeader::read_thrift(&mut prot)?) + } else { + Ok(PageHeader::read_thrift_without_stats(&mut prot)?) + } } fn decrypt_page_data( @@ -756,8 +765,14 @@ impl SerializedPageReaderContext { ) -> Result { match self.page_crypto_context(page_index, dictionary_page) { None => { - let mut prot = TCompactInputProtocol::new(input); - Ok(PageHeader::read_from_in_protocol(&mut prot)?) + let mut prot = ThriftReadInputProtocol::new(input); + if self.read_stats { + Ok(PageHeader::read_thrift(&mut prot)?) + } else { + use crate::file::metadata::thrift_gen::PageHeader; + + Ok(PageHeader::read_thrift_without_stats(&mut prot)?) + } } Some(page_crypto_context) => { let data_decryptor = page_crypto_context.data_decryptor(); @@ -770,8 +785,12 @@ impl SerializedPageReaderContext { )) })?; - let mut prot = TCompactSliceInputProtocol::new(buf.as_slice()); - Ok(PageHeader::read_from_in_protocol(&mut prot)?) + let mut prot = ThriftSliceInputProtocol::new(buf.as_slice()); + if self.read_stats { + Ok(PageHeader::read_thrift(&mut prot)?) + } else { + Ok(PageHeader::read_thrift_without_stats(&mut prot)?) + } } } } @@ -1107,7 +1126,7 @@ mod tests { }; use crate::file::properties::{EnabledStatistics, WriterProperties}; - use crate::basic::{self, BoundaryOrder, ColumnOrder, SortOrder}; + use crate::basic::{self, BoundaryOrder, ColumnOrder, Encoding, SortOrder}; use crate::column::reader::ColumnReader; use crate::data_type::private::ParquetValueType; use crate::data_type::{AsBytes, FixedLenByteArrayType, Int32Type}; @@ -1396,7 +1415,7 @@ mod tests { assert_eq!(def_levels_byte_len, 2); assert_eq!(rep_levels_byte_len, 0); assert!(is_compressed); - assert!(statistics.is_some()); + assert!(statistics.is_none()); // page stats are no longer read true } _ => false, @@ -1498,7 +1517,7 @@ mod tests { assert_eq!(def_levels_byte_len, 2); assert_eq!(rep_levels_byte_len, 0); assert!(is_compressed); - assert!(statistics.is_some()); + assert!(statistics.is_none()); // page stats are no longer read true } _ => false, diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index d4501830ac40..e51f445b7e7e 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -45,6 +45,7 @@ use crate::basic::Type; use crate::data_type::private::ParquetValueType; use crate::data_type::*; use crate::errors::{ParquetError, Result}; +use crate::file::metadata::thrift_gen::PageStatistics; use crate::util::bit_util::FromBytes; pub(crate) mod private { @@ -117,6 +118,7 @@ macro_rules! statistics_enum_func { }}; } +// FIXME(ets): remove this when done with format changes /// Converts Thrift definition into `Statistics`. pub fn from_thrift( physical_type: Type, @@ -266,6 +268,156 @@ pub fn from_thrift( }) } +/// Converts Thrift definition into `Statistics`. +pub(crate) fn from_thrift_page_stats( + physical_type: Type, + thrift_stats: Option, +) -> Result> { + Ok(match thrift_stats { + Some(stats) => { + // Number of nulls recorded, when it is not available, we just mark it as 0. + // TODO this should be `None` if there is no information about NULLS. + // see https://github.com/apache/arrow-rs/pull/6216/files + let null_count = stats.null_count.unwrap_or(0); + + if null_count < 0 { + return Err(ParquetError::General(format!( + "Statistics null count is negative {null_count}", + ))); + } + + // Generic null count. + let null_count = Some(null_count as u64); + // Generic distinct count (count of distinct values occurring) + let distinct_count = stats.distinct_count.map(|value| value as u64); + // Whether or not statistics use deprecated min/max fields. + let old_format = stats.min_value.is_none() && stats.max_value.is_none(); + // Generic min value as bytes. + let min = if old_format { + stats.min + } else { + stats.min_value + }; + // Generic max value as bytes. + let max = if old_format { + stats.max + } else { + stats.max_value + }; + + fn check_len(min: &Option>, max: &Option>, len: usize) -> Result<()> { + if let Some(min) = min { + if min.len() < len { + return Err(ParquetError::General( + "Insufficient bytes to parse min statistic".to_string(), + )); + } + } + if let Some(max) = max { + if max.len() < len { + return Err(ParquetError::General( + "Insufficient bytes to parse max statistic".to_string(), + )); + } + } + Ok(()) + } + + match physical_type { + Type::BOOLEAN => check_len(&min, &max, 1), + Type::INT32 | Type::FLOAT => check_len(&min, &max, 4), + Type::INT64 | Type::DOUBLE => check_len(&min, &max, 8), + Type::INT96 => check_len(&min, &max, 12), + _ => Ok(()), + }?; + + // Values are encoded using PLAIN encoding definition, except that + // variable-length byte arrays do not include a length prefix. + // + // Instead of using actual decoder, we manually convert values. + let res = match physical_type { + Type::BOOLEAN => Statistics::boolean( + min.map(|data| data[0] != 0), + max.map(|data| data[0] != 0), + distinct_count, + null_count, + old_format, + ), + Type::INT32 => Statistics::int32( + min.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), + max.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::INT64 => Statistics::int64( + min.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), + max.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::INT96 => { + // INT96 statistics may not be correct, because comparison is signed + let min = if let Some(data) = min { + assert_eq!(data.len(), 12); + Some(Int96::try_from_le_slice(&data)?) + } else { + None + }; + let max = if let Some(data) = max { + assert_eq!(data.len(), 12); + Some(Int96::try_from_le_slice(&data)?) + } else { + None + }; + Statistics::int96(min, max, distinct_count, null_count, old_format) + } + Type::FLOAT => Statistics::float( + min.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), + max.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::DOUBLE => Statistics::double( + min.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), + max.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), + distinct_count, + null_count, + old_format, + ), + Type::BYTE_ARRAY => Statistics::ByteArray( + ValueStatistics::new( + min.map(ByteArray::from), + max.map(ByteArray::from), + distinct_count, + null_count, + old_format, + ) + .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false)) + .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)), + ), + Type::FIXED_LEN_BYTE_ARRAY => Statistics::FixedLenByteArray( + ValueStatistics::new( + min.map(ByteArray::from).map(FixedLenByteArray::from), + max.map(ByteArray::from).map(FixedLenByteArray::from), + distinct_count, + null_count, + old_format, + ) + .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false)) + .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)), + ), + }; + + Some(res) + } + None => None, + }) +} + +// FIXME(ets): remove when done with format changes /// Convert Statistics into Thrift definition. pub fn to_thrift(stats: Option<&Statistics>) -> Option { let stats = stats?; @@ -315,6 +467,55 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option) -> Option { + let stats = stats?; + + // record null count if it can fit in i64 + let null_count = stats + .null_count_opt() + .and_then(|value| i64::try_from(value).ok()); + + // record distinct count if it can fit in i64 + let distinct_count = stats + .distinct_count_opt() + .and_then(|value| i64::try_from(value).ok()); + + let mut thrift_stats = PageStatistics { + max: None, + min: None, + null_count, + distinct_count, + max_value: None, + min_value: None, + is_max_value_exact: None, + is_min_value_exact: None, + }; + + // Get min/max if set. + let (min, max, min_exact, max_exact) = ( + stats.min_bytes_opt().map(|x| x.to_vec()), + stats.max_bytes_opt().map(|x| x.to_vec()), + Some(stats.min_is_exact()), + Some(stats.max_is_exact()), + ); + if stats.is_min_max_backwards_compatible() { + // Copy to deprecated min, max values for compatibility with older readers + thrift_stats.min.clone_from(&min); + thrift_stats.max.clone_from(&max); + } + + if !stats.is_min_max_deprecated() { + thrift_stats.min_value = min; + thrift_stats.max_value = max; + } + + thrift_stats.is_min_value_exact = min_exact; + thrift_stats.is_max_value_exact = max_exact; + + Some(thrift_stats) +} + /// Strongly typed statistics for a column chunk within a row group. /// /// This structure is a natively typed, in memory representation of the diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 158b2a21b7cd..71881b00ff55 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -19,13 +19,13 @@ //! using row group writers and column writers respectively. use crate::bloom_filter::Sbbf; +use crate::file::metadata::thrift_gen::PageHeader; use crate::file::page_index::index::Index; use crate::file::page_index::offset_index::OffsetIndexMetaData; -use crate::thrift::TSerializable; +use crate::parquet_thrift::{ThriftCompactOutputProtocol, WriteThrift}; use std::fmt::Debug; use std::io::{BufWriter, IoSlice, Read}; use std::{io::Write, sync::Arc}; -use thrift::protocol::TCompactOutputProtocol; use crate::column::page_encryption::PageEncryptor; use crate::column::writer::{get_typed_column_writer_mut, ColumnCloseResult, ColumnWriterImpl}; @@ -939,15 +939,15 @@ impl<'a, W: Write> SerializedPageWriter<'a, W> { /// Serializes page header into Thrift. /// Returns number of bytes that have been written into the sink. #[inline] - fn serialize_page_header(&mut self, header: crate::format::PageHeader) -> Result { + fn serialize_page_header(&mut self, header: PageHeader) -> Result { let start_pos = self.sink.bytes_written(); match self.page_encryptor_and_sink_mut() { Some((page_encryptor, sink)) => { page_encryptor.encrypt_page_header(&header, sink)?; } None => { - let mut protocol = TCompactOutputProtocol::new(&mut self.sink); - header.write_to_out_protocol(&mut protocol)?; + let mut protocol = ThriftCompactOutputProtocol::new(&mut self.sink); + header.write_thrift(&mut protocol)?; } } Ok(self.sink.bytes_written() - start_pos) @@ -1602,6 +1602,7 @@ mod tests { let props = ReaderProperties::builder() .set_backward_compatible_lz4(false) + .set_read_page_statistics(true) .build(); let mut page_reader = SerializedPageReader::new_with_properties( Arc::new(reader), diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index 889e5fafef60..5720fd4ce0e7 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -323,6 +323,66 @@ macro_rules! thrift_struct { } } +/// only implements ReadThrift for the give IDL struct definition +#[macro_export] +macro_rules! thrift_struct_read_impl { + ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { + $(#[cfg_attr(not(doctest), $($def_attrs)*)])* + impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? { + fn read_thrift(prot: &mut R) -> Result { + $(let mut $field_name: Option<$crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?)> = None;)* + let mut last_field_id = 0i16; + loop { + let field_ident = prot.read_field_begin(last_field_id)?; + if field_ident.field_type == FieldType::Stop { + break; + } + match field_ident.id { + $($field_id => { + let val = $crate::__thrift_read_field!(prot, field_ident, $field_type $($field_lt)? $($element_type)?); + $field_name = Some(val); + })* + _ => { + prot.skip(field_ident.field_type)?; + } + }; + last_field_id = field_ident.id; + } + $($crate::__thrift_result_required_or_optional!($required_or_optional $field_name);)* + Ok(Self { + $($field_name),* + }) + } + } + } +} + +/// only implements WriteThrift for the give IDL struct definition +#[macro_export] +macro_rules! thrift_struct_write_impl { + ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { + impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + #[allow(unused_assignments)] + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + #[allow(unused_mut, unused_variables)] + let mut last_field_id = 0i16; + $($crate::__thrift_write_required_or_optional_field!($required_or_optional $field_name, $field_id, $field_type, self, writer, last_field_id);)* + writer.write_struct_end() + } + } + + impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? { + fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { + writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } + } + } +} + #[doc(hidden)] #[macro_export] macro_rules! __thrift_write_required_or_optional_field { @@ -391,15 +451,19 @@ macro_rules! __thrift_required_or_optional { (optional $field_type:ty) => { Option<$field_type> }; } +// Performance note: using `expect` here is about 4% faster on the page index bench, +// but we want to propagate errors. Using `ok_or` is *much* slower. #[doc(hidden)] #[macro_export] macro_rules! __thrift_result_required_or_optional { (required $field_name:ident) => { - let $field_name = $field_name.expect(concat!( - "Required field ", - stringify!($field_name), - " is missing", - )); + let Some($field_name) = $field_name else { + return Err(general_err!(concat!( + "Required field ", + stringify!($field_name), + " is missing", + ))); + }; }; (optional $field_name:ident) => {}; } @@ -433,7 +497,7 @@ macro_rules! __thrift_read_field { }; ($prot:tt, $field_ident:tt, binary) => { // this one needs to not conflict with `list` - $prot.read_bytes()?.to_vec() + $prot.read_bytes_owned()? }; ($prot:tt, $field_ident:tt, double) => { $crate::parquet_thrift::OrderedF64::read_thrift(&mut *$prot)? diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index 17847d0b71e5..5d549f012c86 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -20,7 +20,10 @@ // to not allocate byte arrays or strings. #![allow(dead_code)] -use std::{cmp::Ordering, io::Write}; +use std::{ + cmp::Ordering, + io::{Read, Write}, +}; use crate::errors::{ParquetError, Result}; @@ -197,6 +200,8 @@ pub(crate) trait ThriftCompactInputProtocol<'a> { /// [binary]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#binary-encoding fn read_bytes(&mut self) -> Result<&'a [u8]>; + fn read_bytes_owned(&mut self) -> Result>; + /// Skip the next `n` bytes of input. fn skip_bytes(&mut self, n: usize) -> Result<()>; @@ -459,6 +464,10 @@ impl<'b, 'a: 'b> ThriftCompactInputProtocol<'b> for ThriftSliceInputProtocol<'a> Ok(ret) } + fn read_bytes_owned(&mut self) -> Result> { + Ok(self.read_bytes()?.to_vec()) + } + #[inline] fn skip_bytes(&mut self, n: usize) -> Result<()> { self.buf.get(..n).ok_or_else(eof_error)?; @@ -480,6 +489,54 @@ fn eof_error() -> ParquetError { eof_err!("Unexpected EOF") } +/// A Thrift input protocol that wraps a [`Read`] object. +/// +/// Note that this is only intended for use in reading Parquet page headers. This will panic +/// if Thrift `binary` data is encountered because a slice of that data cannot be returned. +pub(crate) struct ThriftReadInputProtocol { + reader: R, +} + +impl ThriftReadInputProtocol { + pub(crate) fn new(reader: R) -> Self { + Self { reader } + } +} + +impl<'a, R: Read> ThriftCompactInputProtocol<'a> for ThriftReadInputProtocol { + #[inline] + fn read_byte(&mut self) -> Result { + let mut buf = [0_u8; 1]; + self.reader.read_exact(&mut buf)?; + Ok(buf[0]) + } + + fn read_bytes(&mut self) -> Result<&'a [u8]> { + unimplemented!() + } + + fn read_bytes_owned(&mut self) -> Result> { + let len = self.read_vlq()? as usize; + let mut v = Vec::with_capacity(len); + std::io::copy(&mut self.reader.by_ref().take(len as u64), &mut v)?; + Ok(v) + } + + fn skip_bytes(&mut self, n: usize) -> Result<()> { + std::io::copy( + &mut self.reader.by_ref().take(n as u64), + &mut std::io::sink(), + )?; + Ok(()) + } + + fn read_double(&mut self) -> Result { + let mut buf = [0_u8; 8]; + self.reader.read_exact(&mut buf)?; + Ok(f64::from_le_bytes(buf)) + } +} + /// Trait implemented for objects that can be deserialized from a Thrift input stream. /// Implementations are provided for Thrift primitive types. pub(crate) trait ReadThrift<'a, R: ThriftCompactInputProtocol<'a>> { @@ -533,7 +590,7 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for &'a str { impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for String { fn read_thrift(prot: &mut R) -> Result { - Ok(prot.read_string()?.to_owned()) + Ok(String::from_utf8(prot.read_bytes_owned()?)?) } } diff --git a/parquet/src/thrift.rs b/parquet/src/thrift.rs index 580ac2d7dbd6..2492910a3115 100644 --- a/parquet/src/thrift.rs +++ b/parquet/src/thrift.rs @@ -36,15 +36,22 @@ pub trait TSerializable: Sized { // Public function to aid benchmarking. Reads Parquet `FileMetaData` encoded in `bytes`. #[doc(hidden)] pub fn bench_file_metadata(bytes: &bytes::Bytes) { - let mut input = TCompactSliceInputProtocol::new(bytes); - crate::format::FileMetaData::read_from_in_protocol(&mut input).unwrap(); + crate::file::metadata::thrift_gen::bench_file_metadata(bytes); } // Public function to aid benchmarking. Reads Parquet `PageHeader` encoded in `bytes`. #[doc(hidden)] pub fn bench_page_header(bytes: &bytes::Bytes) { - let mut prot = TCompactSliceInputProtocol::new(bytes); - crate::format::PageHeader::read_from_in_protocol(&mut prot).unwrap(); + use crate::parquet_thrift::ReadThrift; + let mut prot = crate::parquet_thrift::ThriftReadInputProtocol::new(bytes.as_ref()); + crate::file::metadata::thrift_gen::PageHeader::read_thrift(&mut prot).unwrap(); +} + +// Public function to aid benchmarking. Reads Parquet `PageHeader` encoded in `bytes`. +#[doc(hidden)] +pub fn bench_page_header_no_stats(bytes: &bytes::Bytes) { + let mut prot = crate::parquet_thrift::ThriftReadInputProtocol::new(bytes.as_ref()); + crate::file::metadata::thrift_gen::PageHeader::read_thrift_without_stats(&mut prot).unwrap(); } /// A more performant implementation of [`TCompactInputProtocol`] that reads a slice diff --git a/parquet/tests/arrow_reader/bad_data.rs b/parquet/tests/arrow_reader/bad_data.rs index ecf449a7ce61..be401030e7f9 100644 --- a/parquet/tests/arrow_reader/bad_data.rs +++ b/parquet/tests/arrow_reader/bad_data.rs @@ -101,7 +101,7 @@ fn test_arrow_gh_41317() { let err = read_file("ARROW-GH-41317.parquet").unwrap_err(); assert_eq!( err.to_string(), - "External: Parquet argument error: External: bad data" + "External: Parquet argument error: Parquet error: StructArrayReader out of sync in read_records, expected 5 read, got 2" ); } diff --git a/parquet/tests/encryption/encryption_agnostic.rs b/parquet/tests/encryption/encryption_agnostic.rs index e071471712f4..48b5c77d9b97 100644 --- a/parquet/tests/encryption/encryption_agnostic.rs +++ b/parquet/tests/encryption/encryption_agnostic.rs @@ -72,7 +72,7 @@ pub fn read_plaintext_footer_file_without_decryption_properties() { match record_reader.next() { Some(Err(ArrowError::ParquetError(s))) => { - assert!(s.contains("protocol error")); + assert!(s.contains("Parquet error")); } _ => { panic!("Expected ArrowError::ParquetError"); @@ -137,7 +137,7 @@ pub async fn read_plaintext_footer_file_without_decryption_properties_async() { match record_reader.next().await { Some(Err(ParquetError::ArrowError(s))) => { - assert!(s.contains("protocol error")); + assert!(s.contains("Parquet error")); } _ => { panic!("Expected ArrowError::ParquetError"); From b0cc25486aeee21d886b99d7e677abb0f18ed2bb Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Thu, 25 Sep 2025 08:57:38 -0700 Subject: [PATCH 12/15] [thrift-remodel] Write Parquet page indexes (#8427) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Closes #5854. # Rationale for this change Continues the remodel by implementing writing of the page index structures. # What changes are included in this PR? This PR removes the old `parquet::file::page_index::Index` enum and replaces with the new `ColumnIndexMetaData` struct. # Are these changes tested? Covered by existing tests # Are there any user-facing changes? Yes. --- parquet/src/arrow/arrow_reader/mod.rs | 2 +- parquet/src/arrow/arrow_reader/selection.rs | 2 +- parquet/src/column/writer/mod.rs | 76 ++-- parquet/src/file/metadata/memory.rs | 29 -- parquet/src/file/metadata/mod.rs | 164 +++---- parquet/src/file/metadata/writer.rs | 96 ++--- parquet/src/file/page_index/column_index.rs | 332 ++++++++++---- parquet/src/file/page_index/index.rs | 455 -------------------- parquet/src/file/page_index/index_reader.rs | 28 +- parquet/src/file/page_index/mod.rs | 1 - parquet/src/file/page_index/offset_index.rs | 8 - parquet/src/file/properties.rs | 2 +- parquet/src/file/statistics.rs | 9 +- parquet/src/file/writer.rs | 49 +-- 14 files changed, 415 insertions(+), 838 deletions(-) delete mode 100644 parquet/src/file/page_index/index.rs diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 44e0441ac99f..ff221656a302 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -261,7 +261,7 @@ impl ArrowReaderBuilder { /// Skip 1100 (skip the remaining 900 rows in row group 2 and the first 200 rows in row group 3) /// ``` /// - /// [`Index`]: crate::file::page_index::index::Index + /// [`Index`]: crate::file::page_index::column_index::ColumnIndexMetaData pub fn with_row_selection(self, selection: RowSelection) -> Self { Self { selection: Some(selection), diff --git a/parquet/src/arrow/arrow_reader/selection.rs b/parquet/src/arrow/arrow_reader/selection.rs index 9f06dc184b6c..21ed97b8bde1 100644 --- a/parquet/src/arrow/arrow_reader/selection.rs +++ b/parquet/src/arrow/arrow_reader/selection.rs @@ -97,7 +97,7 @@ impl RowSelector { /// * It contains no [`RowSelector`] of 0 rows /// * Consecutive [`RowSelector`]s alternate skipping or selecting rows /// -/// [`PageIndex`]: crate::file::page_index::index::PageIndex +/// [`PageIndex`]: crate::file::page_index::column_index::ColumnIndexMetaData #[derive(Debug, Clone, Default, Eq, PartialEq)] pub struct RowSelection { selectors: Vec, diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index acfe0ce3d0ee..3f516462f20c 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -21,7 +21,7 @@ use bytes::Bytes; use half::f16; use crate::bloom_filter::Sbbf; -use crate::file::page_index::index::Index; +use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::page_index::offset_index::OffsetIndexMetaData; use std::collections::{BTreeSet, VecDeque}; use std::str; @@ -192,7 +192,7 @@ pub struct ColumnCloseResult { /// Optional bloom filter for this column pub bloom_filter: Option, /// Optional column index, for filtering - pub column_index: Option, + pub column_index: Option, /// Optional offset index, identifying page locations pub offset_index: Option, } @@ -2959,28 +2959,22 @@ mod tests { assert!(r.column_index.is_some()); let col_idx = r.column_index.unwrap(); let col_idx = match col_idx { - Index::INT32(col_idx) => col_idx, + ColumnIndexMetaData::INT32(col_idx) => col_idx, _ => panic!("wrong stats type"), }; // null_pages should be true for page 0 - assert!(col_idx.indexes[0].is_null_page()); + assert!(col_idx.is_null_page(0)); // min and max should be empty byte arrays - assert!(col_idx.indexes[0].min().is_none()); - assert!(col_idx.indexes[0].max().is_none()); + assert!(col_idx.min_value(0).is_none()); + assert!(col_idx.max_value(0).is_none()); // null_counts should be defined and be 4 for page 0 - assert!(col_idx.indexes[0].null_count().is_some()); - assert_eq!(col_idx.indexes[0].null_count().unwrap(), 4); + assert!(col_idx.null_count(0).is_some()); + assert_eq!(col_idx.null_count(0), Some(4)); // there is no repetition so rep histogram should be absent - assert!(col_idx.indexes[0].repetition_level_histogram().is_none()); + assert!(col_idx.repetition_level_histogram(0).is_none()); // definition_level_histogram should be present and should be 0:4, 1:0 - assert!(col_idx.indexes[0].definition_level_histogram().is_some()); - assert_eq!( - col_idx.indexes[0] - .definition_level_histogram() - .unwrap() - .values(), - &[4, 0] - ); + assert!(col_idx.definition_level_histogram(0).is_some()); + assert_eq!(col_idx.definition_level_histogram(0).unwrap(), &[4, 0]); } #[test] @@ -3004,15 +2998,15 @@ mod tests { // column index let column_index = match column_index { - Index::INT32(column_index) => column_index, + ColumnIndexMetaData::INT32(column_index) => column_index, _ => panic!("wrong stats type"), }; - assert_eq!(2, column_index.indexes.len()); + assert_eq!(2, column_index.num_pages()); assert_eq!(2, offset_index.page_locations.len()); assert_eq!(BoundaryOrder::UNORDERED, column_index.boundary_order); for idx in 0..2 { - assert!(!column_index.indexes[idx].is_null_page()); - assert_eq!(0, *column_index.indexes[idx].null_count.as_ref().unwrap()); + assert!(!column_index.is_null_page(idx)); + assert_eq!(0, column_index.null_count(0).unwrap()); } if let Some(stats) = r.metadata.statistics() { @@ -3022,8 +3016,8 @@ mod tests { // first page is [1,2,3,4] // second page is [-5,2,4,8] // note that we don't increment here, as this is a non BinaryArray type. - assert_eq!(stats.min_opt(), column_index.indexes[1].min()); - assert_eq!(stats.max_opt(), column_index.indexes[1].max()); + assert_eq!(stats.min_opt(), column_index.min_value(1)); + assert_eq!(stats.max_opt(), column_index.max_value(1)); } else { panic!("expecting Statistics::Int32"); } @@ -3064,25 +3058,25 @@ mod tests { let offset_index = r.offset_index.unwrap(); let column_index = match column_index { - Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, _ => panic!("wrong stats type"), }; assert_eq!(3, r.rows_written); // column index - assert_eq!(1, column_index.indexes.len()); + assert_eq!(1, column_index.num_pages()); assert_eq!(1, offset_index.page_locations.len()); assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order); - assert!(!column_index.indexes[0].is_null_page()); - assert_eq!(Some(0), column_index.indexes[0].null_count()); + assert!(!column_index.is_null_page(0)); + assert_eq!(Some(0), column_index.null_count(0)); if let Some(stats) = r.metadata.statistics() { assert_eq!(stats.null_count_opt(), Some(0)); assert_eq!(stats.distinct_count_opt(), None); if let Statistics::FixedLenByteArray(stats) = stats { - let column_index_min_value = column_index.indexes[0].min_bytes().unwrap(); - let column_index_max_value = column_index.indexes[0].max_bytes().unwrap(); + let column_index_min_value = column_index.min_value(0).unwrap(); + let column_index_max_value = column_index.max_value(0).unwrap(); // Column index stats are truncated, while the column chunk's aren't. assert_ne!(stats.min_bytes_opt().unwrap(), column_index_min_value); @@ -3135,25 +3129,25 @@ mod tests { let offset_index = r.offset_index.unwrap(); let column_index = match column_index { - Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, _ => panic!("wrong stats type"), }; assert_eq!(1, r.rows_written); // column index - assert_eq!(1, column_index.indexes.len()); + assert_eq!(1, column_index.num_pages()); assert_eq!(1, offset_index.page_locations.len()); assert_eq!(BoundaryOrder::ASCENDING, column_index.boundary_order); - assert!(!column_index.indexes[0].is_null_page()); - assert_eq!(Some(0), column_index.indexes[0].null_count()); + assert!(!column_index.is_null_page(0)); + assert_eq!(Some(0), column_index.null_count(0)); if let Some(stats) = r.metadata.statistics() { assert_eq!(stats.null_count_opt(), Some(0)); assert_eq!(stats.distinct_count_opt(), None); if let Statistics::FixedLenByteArray(_stats) = stats { - let column_index_min_value = column_index.indexes[0].min_bytes().unwrap(); - let column_index_max_value = column_index.indexes[0].max_bytes().unwrap(); + let column_index_min_value = column_index.min_value(0).unwrap(); + let column_index_max_value = column_index.max_value(0).unwrap(); assert_eq!(column_index_min_value.len(), 1); assert_eq!(column_index_max_value.len(), 1); @@ -3190,11 +3184,11 @@ mod tests { // ensure bytes weren't truncated for column index let column_index = r.column_index.unwrap(); let column_index = match column_index { - Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, _ => panic!("wrong stats type"), }; - let column_index_min_bytes = column_index.indexes[0].min_bytes().unwrap(); - let column_index_max_bytes = column_index.indexes[0].min_bytes().unwrap(); + let column_index_min_bytes = column_index.min_value(0).unwrap(); + let column_index_max_bytes = column_index.max_value(0).unwrap(); assert_eq!(expected_value, column_index_min_bytes); assert_eq!(expected_value, column_index_max_bytes); @@ -3233,11 +3227,11 @@ mod tests { // ensure bytes weren't truncated for column index let column_index = r.column_index.unwrap(); let column_index = match column_index { - Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => column_index, _ => panic!("wrong stats type"), }; - let column_index_min_bytes = column_index.indexes[0].min_bytes().unwrap(); - let column_index_max_bytes = column_index.indexes[0].min_bytes().unwrap(); + let column_index_min_bytes = column_index.min_value(0).unwrap(); + let column_index_max_bytes = column_index.max_value(0).unwrap(); assert_eq!(expected_value, column_index_min_bytes); assert_eq!(expected_value, column_index_max_bytes); diff --git a/parquet/src/file/metadata/memory.rs b/parquet/src/file/metadata/memory.rs index 69eee3c2999d..19122a1b5522 100644 --- a/parquet/src/file/metadata/memory.rs +++ b/parquet/src/file/metadata/memory.rs @@ -27,7 +27,6 @@ use crate::file::page_encoding_stats::PageEncodingStats; use crate::file::page_index::column_index::{ ByteArrayColumnIndex, ColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex, }; -use crate::file::page_index::index::{Index, NativeIndex, PageIndex}; use crate::file::page_index::offset_index::{OffsetIndexMetaData, PageLocation}; use crate::file::statistics::{Statistics, ValueStatistics}; use std::sync::Arc; @@ -199,34 +198,6 @@ impl HeapSize for ByteArrayColumnIndex { } } -impl HeapSize for Index { - fn heap_size(&self) -> usize { - match self { - Index::NONE => 0, - Index::BOOLEAN(native_index) => native_index.heap_size(), - Index::INT32(native_index) => native_index.heap_size(), - Index::INT64(native_index) => native_index.heap_size(), - Index::INT96(native_index) => native_index.heap_size(), - Index::FLOAT(native_index) => native_index.heap_size(), - Index::DOUBLE(native_index) => native_index.heap_size(), - Index::BYTE_ARRAY(native_index) => native_index.heap_size(), - Index::FIXED_LEN_BYTE_ARRAY(native_index) => native_index.heap_size(), - } - } -} - -impl HeapSize for NativeIndex { - fn heap_size(&self) -> usize { - self.indexes.heap_size() + self.boundary_order.heap_size() - } -} - -impl HeapSize for PageIndex { - fn heap_size(&self) -> usize { - self.min.heap_size() + self.max.heap_size() + self.null_count.heap_size() - } -} - impl HeapSize for ValueStatistics { fn heap_size(&self) -> usize { self.min_opt().map(T::heap_size).unwrap_or(0) diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 95e9a48b46f3..caf001e5fa27 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -101,19 +101,18 @@ use crate::encryption::decrypt::FileDecryptor; #[cfg(feature = "encryption")] use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData}; pub(crate) use crate::file::metadata::memory::HeapSize; +use crate::file::page_index::column_index::{ByteArrayColumnIndex, PrimitiveColumnIndex}; +use crate::file::statistics::{self, Statistics}; use crate::file::{ page_encoding_stats::{self, PageEncodingStats}, page_index::{column_index::ColumnIndexMetaData, offset_index::PageLocation}, }; -use crate::file::{ - page_index::index::PageIndex, - statistics::{self, Statistics}, -}; use crate::format::ColumnCryptoMetaData as TColumnCryptoMetaData; use crate::schema::types::{ ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor, Type as SchemaType, }; +use crate::thrift_struct; use crate::{ basic::BoundaryOrder, errors::{ParquetError, Result}, @@ -128,10 +127,6 @@ use crate::{ use crate::{ data_type::private::ParquetValueType, file::page_index::offset_index::OffsetIndexMetaData, }; -use crate::{ - file::page_index::index::{Index, NativeIndex}, - thrift_struct, -}; pub use push_decoder::ParquetMetaDataPushDecoder; pub use reader::{FooterTail, PageIndexPolicy, ParquetMetaDataReader}; @@ -145,18 +140,18 @@ pub(crate) use writer::ThriftMetadataWriter; /// /// This structure is an in-memory representation of multiple [`ColumnIndex`] /// structures in a parquet file footer, as described in the Parquet [PageIndex -/// documentation]. Each [`Index`] holds statistics about all the pages in a +/// documentation]. Each [`ColumnIndex`] holds statistics about all the pages in a /// particular column chunk. /// /// `column_index[row_group_number][column_number]` holds the -/// [`Index`] corresponding to column `column_number` of row group +/// [`ColumnIndex`] corresponding to column `column_number` of row group /// `row_group_number`. /// -/// For example `column_index[2][3]` holds the [`Index`] for the fourth +/// For example `column_index[2][3]` holds the [`ColumnIndex`] for the fourth /// column in the third row group of the parquet file. /// /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -/// [`ColumnIndex`]: crate::format::ColumnIndex +/// [`ColumnIndex`]: crate::file::page_index::column_index::ColumnIndexMetaData pub type ParquetColumnIndex = Vec>; /// [`OffsetIndexMetaData`] for each data page of each row group of each column @@ -1632,135 +1627,74 @@ impl ColumnIndexBuilder { /// Build and get the column index /// /// Note: callers should check [`Self::valid`] before calling this method - pub fn build(self) -> Result { + pub fn build(self) -> Result { Ok(match self.column_type { Type::BOOLEAN => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::BOOLEAN(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_page_index()?; + ColumnIndexMetaData::BOOLEAN(index) } Type::INT32 => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::INT32(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_page_index()?; + ColumnIndexMetaData::INT32(index) } Type::INT64 => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::INT64(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_page_index()?; + ColumnIndexMetaData::INT64(index) } Type::INT96 => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::INT96(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_page_index()?; + ColumnIndexMetaData::INT96(index) } Type::FLOAT => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::FLOAT(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_page_index()?; + ColumnIndexMetaData::FLOAT(index) } Type::DOUBLE => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::DOUBLE(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_page_index()?; + ColumnIndexMetaData::DOUBLE(index) } Type::BYTE_ARRAY => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::BYTE_ARRAY(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_byte_array_index()?; + ColumnIndexMetaData::BYTE_ARRAY(index) } Type::FIXED_LEN_BYTE_ARRAY => { - let (indexes, boundary_order) = self.build_page_index()?; - Index::FIXED_LEN_BYTE_ARRAY(NativeIndex { - indexes, - boundary_order, - }) + let index = self.build_byte_array_index()?; + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(index) } }) } - fn build_page_index(self) -> Result<(Vec>, BoundaryOrder)> + fn build_page_index(self) -> Result> where T: ParquetValueType, { - let len = self.min_values.len(); - - let null_counts = self - .null_counts - .iter() - .map(|x| Some(*x)) - .collect::>(); - - // histograms are a 1D array encoding a 2D num_pages X num_levels matrix. - let to_page_histograms = |opt_hist: Option>| { - if let Some(hist) = opt_hist { - // TODO: should we assert (hist.len() % len) == 0? - let num_levels = hist.len() / len; - let mut res = Vec::with_capacity(len); - for i in 0..len { - let page_idx = i * num_levels; - let page_hist = hist[page_idx..page_idx + num_levels].to_vec(); - res.push(Some(LevelHistogram::from(page_hist))); - } - res - } else { - vec![None; len] - } - }; + let min_values: Vec<&[u8]> = self.min_values.iter().map(|v| v.as_slice()).collect(); + let max_values: Vec<&[u8]> = self.max_values.iter().map(|v| v.as_slice()).collect(); - let rep_hists: Vec> = - to_page_histograms(self.repetition_level_histograms); - let def_hists: Vec> = - to_page_histograms(self.definition_level_histograms); + PrimitiveColumnIndex::try_new( + self.null_pages, + self.boundary_order, + Some(self.null_counts), + self.repetition_level_histograms, + self.definition_level_histograms, + min_values, + max_values, + ) + } - let indexes = self - .min_values - .iter() - .zip(self.max_values.iter()) - .zip(self.null_pages.into_iter()) - .zip(null_counts.into_iter()) - .zip(rep_hists.into_iter()) - .zip(def_hists.into_iter()) - .map( - |( - ((((min, max), is_null), null_count), repetition_level_histogram), - definition_level_histogram, - )| { - let (min, max) = if is_null { - (None, None) - } else { - ( - Some(T::try_from_le_slice(min)?), - Some(T::try_from_le_slice(max)?), - ) - }; - Ok(PageIndex { - min, - max, - null_count, - repetition_level_histogram, - definition_level_histogram, - }) - }, - ) - .collect::, ParquetError>>()?; + fn build_byte_array_index(self) -> Result { + let min_values: Vec<&[u8]> = self.min_values.iter().map(|v| v.as_slice()).collect(); + let max_values: Vec<&[u8]> = self.max_values.iter().map(|v| v.as_slice()).collect(); - let boundary_order = self.boundary_order; - Ok((indexes, boundary_order)) + ByteArrayColumnIndex::try_new( + self.null_pages, + self.boundary_order, + Some(self.null_counts), + self.repetition_level_histograms, + self.definition_level_histograms, + min_values, + max_values, + ) } } diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 404bcf5dba8a..a09a703adef8 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -24,8 +24,6 @@ use crate::encryption::{ }; #[cfg(feature = "encryption")] use crate::errors::ParquetError; -use crate::file::metadata::{KeyValue, ParquetMetaData}; -use crate::file::writer::{get_file_magic, TrackedWrite}; use crate::format::EncryptionAlgorithm; #[cfg(feature = "encryption")] use crate::format::{AesGcmV1, ColumnCryptoMetaData}; @@ -33,6 +31,17 @@ use crate::schema::types; use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr}; use crate::thrift::TSerializable; use crate::{errors::Result, file::page_index::column_index::ColumnIndexMetaData}; +use crate::{ + file::writer::{get_file_magic, TrackedWrite}, + parquet_thrift::WriteThrift, +}; +use crate::{ + file::{ + metadata::{KeyValue, ParquetMetaData}, + page_index::offset_index::OffsetIndexMetaData, + }, + parquet_thrift::ThriftCompactOutputProtocol, +}; use std::io::Write; use std::sync::Arc; use thrift::protocol::TCompactOutputProtocol; @@ -45,8 +54,8 @@ pub(crate) struct ThriftMetadataWriter<'a, W: Write> { schema: &'a TypePtr, schema_descr: &'a SchemaDescPtr, row_groups: Vec, - column_indexes: Option<&'a [Vec>]>, - offset_indexes: Option<&'a [Vec>]>, + column_indexes: Option<&'a [Vec>]>, + offset_indexes: Option<&'a [Vec>]>, key_value_metadata: Option>, created_by: Option, object_writer: MetadataObjectWriter, @@ -61,7 +70,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { /// of the serialized offset indexes. fn write_offset_indexes( &mut self, - offset_indexes: &[Vec>], + offset_indexes: &[Vec>], ) -> Result<()> { // iter row group // iter each column @@ -94,7 +103,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { /// of the serialized column indexes. fn write_column_indexes( &mut self, - column_indexes: &[Vec>], + column_indexes: &[Vec>], ) -> Result<()> { // iter row group // iter each column @@ -214,7 +223,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { pub fn with_column_indexes( mut self, - column_indexes: &'a [Vec>], + column_indexes: &'a [Vec>], ) -> Self { self.column_indexes = Some(column_indexes); self @@ -222,7 +231,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { pub fn with_offset_indexes( mut self, - offset_indexes: &'a [Vec>], + offset_indexes: &'a [Vec>], ) -> Self { self.offset_indexes = Some(offset_indexes); self @@ -382,40 +391,14 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { Ok(()) } - fn convert_column_indexes(&self) -> Vec>> { + fn convert_column_indexes(&self) -> Vec>> { if let Some(row_group_column_indexes) = self.metadata.column_index() { (0..self.metadata.row_groups().len()) .map(|rg_idx| { let column_indexes = &row_group_column_indexes[rg_idx]; column_indexes .iter() - .map(|column_index| match column_index { - ColumnIndexMetaData::NONE => None, - ColumnIndexMetaData::BOOLEAN(column_index) => { - Some(column_index.to_thrift()) - } - ColumnIndexMetaData::BYTE_ARRAY(column_index) => { - Some(column_index.to_thrift()) - } - ColumnIndexMetaData::DOUBLE(column_index) => { - Some(column_index.to_thrift()) - } - ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(column_index) => { - Some(column_index.to_thrift()) - } - ColumnIndexMetaData::FLOAT(column_index) => { - Some(column_index.to_thrift()) - } - ColumnIndexMetaData::INT32(column_index) => { - Some(column_index.to_thrift()) - } - ColumnIndexMetaData::INT64(column_index) => { - Some(column_index.to_thrift()) - } - ColumnIndexMetaData::INT96(column_index) => { - Some(column_index.to_thrift()) - } - }) + .map(|column_index| Some(column_index.clone())) .collect() }) .collect() @@ -429,14 +412,14 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { } } - fn convert_offset_index(&self) -> Vec>> { + fn convert_offset_index(&self) -> Vec>> { if let Some(row_group_offset_indexes) = self.metadata.offset_index() { (0..self.metadata.row_groups().len()) .map(|rg_idx| { let offset_indexes = &row_group_offset_indexes[rg_idx]; offset_indexes .iter() - .map(|offset_index| Some(offset_index.to_thrift())) + .map(|offset_index| Some(offset_index.clone())) .collect() }) .collect() @@ -464,6 +447,13 @@ impl MetadataObjectWriter { object.write_to_out_protocol(&mut protocol)?; Ok(()) } + + #[inline] + fn write_thrift_object(object: &impl WriteThrift, sink: impl Write) -> Result<()> { + let mut protocol = ThriftCompactOutputProtocol::new(sink); + object.write_thrift(&mut protocol)?; + Ok(()) + } } /// Implementations of [`MetadataObjectWriter`] methods for when encryption is disabled @@ -481,25 +471,25 @@ impl MetadataObjectWriter { /// Write a column [`OffsetIndex`] in Thrift format fn write_offset_index( &self, - offset_index: &crate::format::OffsetIndex, + offset_index: &OffsetIndexMetaData, _column_chunk: &crate::format::ColumnChunk, _row_group_idx: usize, _column_idx: usize, sink: impl Write, ) -> Result<()> { - Self::write_object(offset_index, sink) + Self::write_thrift_object(offset_index, sink) } /// Write a column [`ColumnIndex`] in Thrift format fn write_column_index( &self, - column_index: &crate::format::ColumnIndex, + column_index: &ColumnIndexMetaData, _column_chunk: &crate::format::ColumnChunk, _row_group_idx: usize, _column_idx: usize, sink: impl Write, ) -> Result<()> { - Self::write_object(column_index, sink) + Self::write_thrift_object(column_index, sink) } /// No-op implementation of row-group metadata encryption @@ -568,14 +558,14 @@ impl MetadataObjectWriter { /// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md fn write_offset_index( &self, - offset_index: &crate::format::OffsetIndex, + offset_index: &OffsetIndexMetaData, column_chunk: &crate::format::ColumnChunk, row_group_idx: usize, column_idx: usize, sink: impl Write, ) -> Result<()> { match &self.file_encryptor { - Some(file_encryptor) => Self::write_object_with_encryption( + Some(file_encryptor) => Self::write_thrift_object_with_encryption( offset_index, sink, file_encryptor, @@ -584,7 +574,7 @@ impl MetadataObjectWriter { row_group_idx, column_idx, ), - None => Self::write_object(offset_index, sink), + None => Self::write_thrift_object(offset_index, sink), } } @@ -593,14 +583,14 @@ impl MetadataObjectWriter { /// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md fn write_column_index( &self, - column_index: &crate::format::ColumnIndex, + column_index: &ColumnIndexMetaData, column_chunk: &crate::format::ColumnChunk, row_group_idx: usize, column_idx: usize, sink: impl Write, ) -> Result<()> { match &self.file_encryptor { - Some(file_encryptor) => Self::write_object_with_encryption( + Some(file_encryptor) => Self::write_thrift_object_with_encryption( column_index, sink, file_encryptor, @@ -609,7 +599,7 @@ impl MetadataObjectWriter { row_group_idx, column_idx, ), - None => Self::write_object(column_index, sink), + None => Self::write_thrift_object(column_index, sink), } } @@ -642,8 +632,8 @@ impl MetadataObjectWriter { ) } - fn write_object_with_encryption( - object: &impl TSerializable, + fn write_thrift_object_with_encryption( + object: &impl WriteThrift, mut sink: impl Write, file_encryptor: &FileEncryptor, column_metadata: &crate::format::ColumnChunk, @@ -671,6 +661,8 @@ impl MetadataObjectWriter { }; if file_encryptor.is_column_encrypted(column_path) { + use crate::encryption::encrypt::encrypt_thrift_object; + let aad = create_module_aad( file_encryptor.file_aad(), module_type, @@ -679,9 +671,9 @@ impl MetadataObjectWriter { None, )?; let mut encryptor = file_encryptor.get_column_encryptor(column_path)?; - encrypt_object(object, &mut encryptor, &mut sink, &aad) + encrypt_thrift_object(object, &mut encryptor, &mut sink, &aad) } else { - Self::write_object(object, sink) + Self::write_thrift_object(object, sink) } } diff --git a/parquet/src/file/page_index/column_index.rs b/parquet/src/file/page_index/column_index.rs index 2d43c93b2e4b..a0893cc9eae9 100644 --- a/parquet/src/file/page_index/column_index.rs +++ b/parquet/src/file/page_index/column_index.rs @@ -22,7 +22,10 @@ use crate::{ data_type::{ByteArray, FixedLenByteArray}, - errors::Result, + errors::{ParquetError, Result}, + parquet_thrift::{ + ElementType, FieldType, ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, + }, }; use std::ops::Deref; @@ -92,18 +95,26 @@ pub struct PrimitiveColumnIndex { } impl PrimitiveColumnIndex { - pub(super) fn try_new(index: ThriftColumnIndex) -> Result { - let len = index.null_pages.len(); + pub(crate) fn try_new( + null_pages: Vec, + boundary_order: BoundaryOrder, + null_counts: Option>, + repetition_level_histograms: Option>, + definition_level_histograms: Option>, + min_bytes: Vec<&[u8]>, + max_bytes: Vec<&[u8]>, + ) -> Result { + let len = null_pages.len(); let mut min_values = Vec::with_capacity(len); let mut max_values = Vec::with_capacity(len); - for (i, is_null) in index.null_pages.iter().enumerate().take(len) { + for (i, is_null) in null_pages.iter().enumerate().take(len) { if !is_null { - let min = index.min_values[i]; + let min = min_bytes[i]; min_values.push(T::try_from_le_slice(min)?); - let max = index.max_values[i]; + let max = max_bytes[i]; max_values.push(T::try_from_le_slice(max)?); } else { // need placeholders @@ -114,43 +125,26 @@ impl PrimitiveColumnIndex { Ok(Self { column_index: ColumnIndex { - null_pages: index.null_pages, - boundary_order: index.boundary_order, - null_counts: index.null_counts, - repetition_level_histograms: index.repetition_level_histograms, - definition_level_histograms: index.definition_level_histograms, + null_pages, + boundary_order, + null_counts, + repetition_level_histograms, + definition_level_histograms, }, min_values, max_values, }) } - pub(crate) fn to_thrift(&self) -> crate::format::ColumnIndex { - let min_values = self - .min_values - .iter() - .map(|x| x.as_bytes().to_vec()) - .collect::>(); - - let max_values = self - .max_values - .iter() - .map(|x| x.as_bytes().to_vec()) - .collect::>(); - - let null_counts = self.null_counts.clone(); - let repetition_level_histograms = self.repetition_level_histograms.clone(); - let definition_level_histograms = self.definition_level_histograms.clone(); - let null_pages = self.null_pages.clone(); - - crate::format::ColumnIndex::new( - null_pages, - min_values, - max_values, - self.boundary_order.into(), - null_counts, - repetition_level_histograms, - definition_level_histograms, + pub(super) fn try_from_thrift(index: ThriftColumnIndex) -> Result { + Self::try_new( + index.null_pages, + index.boundary_order, + index.null_counts, + index.repetition_level_histograms, + index.definition_level_histograms, + index.min_values, + index.max_values, ) } } @@ -229,6 +223,53 @@ impl Deref for PrimitiveColumnIndex { } } +impl WriteThrift for PrimitiveColumnIndex { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + fn write_thrift( + &self, + writer: &mut ThriftCompactOutputProtocol, + ) -> Result<()> { + self.null_pages.write_thrift_field(writer, 1, 0)?; + + // need to handle min/max manually + let len = self.null_pages.len(); + writer.write_field_begin(FieldType::List, 2, 1)?; + writer.write_list_begin(ElementType::Binary, len)?; + for i in 0..len { + let min = self.min_value(i).map(|m| m.as_bytes()).unwrap_or(&[]); + min.write_thrift(writer)?; + } + writer.write_field_begin(FieldType::List, 3, 2)?; + writer.write_list_begin(ElementType::Binary, len)?; + for i in 0..len { + let max = self.max_value(i).map(|m| m.as_bytes()).unwrap_or(&[]); + max.write_thrift(writer)?; + } + let mut last_field_id = self.boundary_order.write_thrift_field(writer, 4, 3)?; + if self.null_counts.is_some() { + last_field_id = + self.null_counts + .as_ref() + .unwrap() + .write_thrift_field(writer, 5, last_field_id)?; + } + if self.repetition_level_histograms.is_some() { + last_field_id = self + .repetition_level_histograms + .as_ref() + .unwrap() + .write_thrift_field(writer, 6, last_field_id)?; + } + if self.definition_level_histograms.is_some() { + self.definition_level_histograms + .as_ref() + .unwrap() + .write_thrift_field(writer, 7, last_field_id)?; + } + writer.write_struct_end() + } +} + /// Column index for byte arrays (fixed length and variable) #[derive(Debug, Clone, PartialEq)] pub struct ByteArrayColumnIndex { @@ -241,11 +282,19 @@ pub struct ByteArrayColumnIndex { } impl ByteArrayColumnIndex { - pub(super) fn try_new(index: ThriftColumnIndex) -> Result { - let len = index.null_pages.len(); - - let min_len = index.min_values.iter().map(|&v| v.len()).sum(); - let max_len = index.max_values.iter().map(|&v| v.len()).sum(); + pub(crate) fn try_new( + null_pages: Vec, + boundary_order: BoundaryOrder, + null_counts: Option>, + repetition_level_histograms: Option>, + definition_level_histograms: Option>, + min_values: Vec<&[u8]>, + max_values: Vec<&[u8]>, + ) -> Result { + let len = null_pages.len(); + + let min_len = min_values.iter().map(|&v| v.len()).sum(); + let max_len = max_values.iter().map(|&v| v.len()).sum(); let mut min_bytes = vec![0u8; min_len]; let mut max_bytes = vec![0u8; max_len]; @@ -255,15 +304,15 @@ impl ByteArrayColumnIndex { let mut min_pos = 0; let mut max_pos = 0; - for (i, is_null) in index.null_pages.iter().enumerate().take(len) { + for (i, is_null) in null_pages.iter().enumerate().take(len) { if !is_null { - let min = index.min_values[i]; + let min = min_values[i]; let dst = &mut min_bytes[min_pos..min_pos + min.len()]; dst.copy_from_slice(min); min_offsets[i] = min_pos; min_pos += min.len(); - let max = index.max_values[i]; + let max = max_values[i]; let dst = &mut max_bytes[max_pos..max_pos + max.len()]; dst.copy_from_slice(max); max_offsets[i] = max_pos; @@ -279,13 +328,12 @@ impl ByteArrayColumnIndex { Ok(Self { column_index: ColumnIndex { - null_pages: index.null_pages, - boundary_order: index.boundary_order, - null_counts: index.null_counts, - repetition_level_histograms: index.repetition_level_histograms, - definition_level_histograms: index.definition_level_histograms, + null_pages, + boundary_order, + null_counts, + repetition_level_histograms, + definition_level_histograms, }, - min_bytes, min_offsets, max_bytes, @@ -293,6 +341,18 @@ impl ByteArrayColumnIndex { }) } + pub(super) fn try_from_thrift(index: ThriftColumnIndex) -> Result { + Self::try_new( + index.null_pages, + index.boundary_order, + index.null_counts, + index.repetition_level_histograms, + index.definition_level_histograms, + index.min_values, + index.max_values, + ) + } + /// Returns the min value for the page indexed by `idx` /// /// It is `None` when all values are null @@ -344,33 +404,6 @@ impl ByteArrayColumnIndex { } }) } - - pub(crate) fn to_thrift(&self) -> crate::format::ColumnIndex { - let mut min_values = Vec::with_capacity(self.num_pages() as usize); - for i in 0..self.num_pages() as usize { - min_values.push(self.min_value(i).unwrap_or(&[]).to_owned()); - } - - let mut max_values = Vec::with_capacity(self.num_pages() as usize); - for i in 0..self.num_pages() as usize { - max_values.push(self.max_value(i).unwrap_or(&[]).to_owned()); - } - - let null_counts = self.null_counts.clone(); - let repetition_level_histograms = self.repetition_level_histograms.clone(); - let definition_level_histograms = self.definition_level_histograms.clone(); - let null_pages = self.null_pages.clone(); - - crate::format::ColumnIndex::new( - null_pages, - min_values, - max_values, - self.boundary_order.into(), - null_counts, - repetition_level_histograms, - definition_level_histograms, - ) - } } impl Deref for ByteArrayColumnIndex { @@ -381,6 +414,53 @@ impl Deref for ByteArrayColumnIndex { } } +impl WriteThrift for ByteArrayColumnIndex { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + fn write_thrift( + &self, + writer: &mut ThriftCompactOutputProtocol, + ) -> Result<()> { + self.null_pages.write_thrift_field(writer, 1, 0)?; + + // need to handle min/max manually + let len = self.null_pages.len(); + writer.write_field_begin(FieldType::List, 2, 1)?; + writer.write_list_begin(ElementType::Binary, len)?; + for i in 0..len { + let min = self.min_value(i).unwrap_or(&[]); + min.write_thrift(writer)?; + } + writer.write_field_begin(FieldType::List, 3, 2)?; + writer.write_list_begin(ElementType::Binary, len)?; + for i in 0..len { + let max = self.max_value(i).unwrap_or(&[]); + max.write_thrift(writer)?; + } + let mut last_field_id = self.boundary_order.write_thrift_field(writer, 4, 3)?; + if self.null_counts.is_some() { + last_field_id = + self.null_counts + .as_ref() + .unwrap() + .write_thrift_field(writer, 5, last_field_id)?; + } + if self.repetition_level_histograms.is_some() { + last_field_id = self + .repetition_level_histograms + .as_ref() + .unwrap() + .write_thrift_field(writer, 6, last_field_id)?; + } + if self.definition_level_histograms.is_some() { + self.definition_level_histograms + .as_ref() + .unwrap() + .write_thrift_field(writer, 7, last_field_id)?; + } + writer.write_struct_end() + } +} + // Macro to generate getter functions for ColumnIndexMetaData. macro_rules! colidx_enum_func { ($self:ident, $func:ident, $arg:ident) => {{ @@ -567,3 +647,99 @@ column_index_iters!(ByteArray, BYTE_ARRAY, |v| v .map(|v| ByteArray::from(v.to_owned()))); column_index_iters!(FixedLenByteArray, FIXED_LEN_BYTE_ARRAY, |v| v .map(|v| FixedLenByteArray::from(v.to_owned()))); + +impl WriteThrift for ColumnIndexMetaData { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + fn write_thrift( + &self, + writer: &mut ThriftCompactOutputProtocol, + ) -> Result<()> { + match self { + ColumnIndexMetaData::BOOLEAN(index) => index.write_thrift(writer), + ColumnIndexMetaData::INT32(index) => index.write_thrift(writer), + ColumnIndexMetaData::INT64(index) => index.write_thrift(writer), + ColumnIndexMetaData::INT96(index) => index.write_thrift(writer), + ColumnIndexMetaData::FLOAT(index) => index.write_thrift(writer), + ColumnIndexMetaData::DOUBLE(index) => index.write_thrift(writer), + ColumnIndexMetaData::BYTE_ARRAY(index) => index.write_thrift(writer), + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(index) => index.write_thrift(writer), + _ => Err(general_err!("Cannot serialize NONE index")), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_page_index_min_max_null() { + let column_index = PrimitiveColumnIndex { + column_index: ColumnIndex { + null_pages: vec![false], + boundary_order: BoundaryOrder::ASCENDING, + null_counts: Some(vec![0]), + repetition_level_histograms: Some(vec![1, 2]), + definition_level_histograms: Some(vec![1, 2, 3]), + }, + min_values: vec![-123], + max_values: vec![234], + }; + + assert_eq!(column_index.min_value(0), Some(&-123)); + assert_eq!(column_index.max_value(0), Some(&234)); + assert_eq!(column_index.null_count(0), Some(0)); + assert_eq!(column_index.repetition_level_histogram(0).unwrap(), &[1, 2]); + assert_eq!( + column_index.definition_level_histogram(0).unwrap(), + &[1, 2, 3] + ); + } + + #[test] + fn test_page_index_min_max_null_none() { + let column_index: PrimitiveColumnIndex = PrimitiveColumnIndex:: { + column_index: ColumnIndex { + null_pages: vec![true], + boundary_order: BoundaryOrder::ASCENDING, + null_counts: Some(vec![1]), + repetition_level_histograms: None, + definition_level_histograms: Some(vec![1, 0]), + }, + min_values: vec![Default::default()], + max_values: vec![Default::default()], + }; + + assert_eq!(column_index.min_value(0), None); + assert_eq!(column_index.max_value(0), None); + assert_eq!(column_index.null_count(0), Some(1)); + assert_eq!(column_index.repetition_level_histogram(0), None); + assert_eq!(column_index.definition_level_histogram(0).unwrap(), &[1, 0]); + } + + #[test] + fn test_invalid_column_index() { + let column_index = ThriftColumnIndex { + null_pages: vec![true, false], + min_values: vec![ + &[], + &[], // this shouldn't be empty as null_pages[1] is false + ], + max_values: vec![ + &[], + &[], // this shouldn't be empty as null_pages[1] is false + ], + null_counts: None, + repetition_level_histograms: None, + definition_level_histograms: None, + boundary_order: BoundaryOrder::UNORDERED, + }; + + let err = PrimitiveColumnIndex::::try_from_thrift(column_index).unwrap_err(); + assert_eq!( + err.to_string(), + "Parquet error: error converting value, expected 4 bytes got 0" + ); + } +} diff --git a/parquet/src/file/page_index/index.rs b/parquet/src/file/page_index/index.rs deleted file mode 100644 index 861dc0c3b04e..000000000000 --- a/parquet/src/file/page_index/index.rs +++ /dev/null @@ -1,455 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! [`Index`] structures holding decoded [`ColumnIndex`] information -//! -//! [`ColumnIndex`]: crate::format::ColumnIndex - -use crate::basic::{BoundaryOrder, Type}; -use crate::data_type::private::ParquetValueType; -use crate::data_type::{AsBytes, ByteArray, FixedLenByteArray, Int96}; -use crate::errors::ParquetError; -use crate::file::metadata::LevelHistogram; -use crate::file::page_index::index_reader::ThriftColumnIndex; -use std::fmt::Debug; - -/// Typed statistics for one data page -/// -/// See [`NativeIndex`] for more details -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct PageIndex { - /// The minimum value, It is None when all values are null - pub min: Option, - /// The maximum value, It is None when all values are null - pub max: Option, - /// Null values in the page - pub null_count: Option, - /// Repetition level histogram for the page - /// - /// `repetition_level_histogram[i]` is a count of how many values are at repetition level `i`. - /// For example, `repetition_level_histogram[0]` indicates how many rows the page contains. - pub repetition_level_histogram: Option, - /// Definition level histogram for the page - /// - /// `definition_level_histogram[i]` is a count of how many values are at definition level `i`. - /// For example, `definition_level_histogram[max_definition_level]` indicates how many - /// non-null values are present in the page. - pub definition_level_histogram: Option, -} - -impl PageIndex { - /// Returns the minimum value in the page - /// - /// It is `None` when all values are null - pub fn min(&self) -> Option<&T> { - self.min.as_ref() - } - - /// Returns the maximum value in the page - /// - /// It is `None` when all values are null - pub fn max(&self) -> Option<&T> { - self.max.as_ref() - } - - /// Returns the number of null values in the page - pub fn null_count(&self) -> Option { - self.null_count - } - - /// Returns the repetition level histogram for the page - pub fn repetition_level_histogram(&self) -> Option<&LevelHistogram> { - self.repetition_level_histogram.as_ref() - } - - /// Returns the definition level histogram for the page - pub fn definition_level_histogram(&self) -> Option<&LevelHistogram> { - self.definition_level_histogram.as_ref() - } - - /// Returns whether this is an all null page - pub fn is_null_page(&self) -> bool { - self.min.is_none() - } -} - -impl PageIndex -where - T: AsBytes, -{ - /// Returns the minimum value in the page as bytes - /// - /// It is `None` when all values are null - pub fn max_bytes(&self) -> Option<&[u8]> { - self.max.as_ref().map(|x| x.as_bytes()) - } - - /// Returns the maximum value in the page as bytes - /// - /// It is `None` when all values are null - pub fn min_bytes(&self) -> Option<&[u8]> { - self.min.as_ref().map(|x| x.as_bytes()) - } -} - -#[derive(Debug, Clone, PartialEq)] -#[allow(non_camel_case_types)] -/// Statistics for data pages in a column chunk. -/// -/// See [`NativeIndex`] for more information -pub enum Index { - /// Sometimes reading page index from parquet file - /// will only return pageLocations without min_max index, - /// `NONE` represents this lack of index information - NONE, - /// Boolean type index - BOOLEAN(NativeIndex), - /// 32-bit integer type index - INT32(NativeIndex), - /// 64-bit integer type index - INT64(NativeIndex), - /// 96-bit integer type (timestamp) index - INT96(NativeIndex), - /// 32-bit floating point type index - FLOAT(NativeIndex), - /// 64-bit floating point type index - DOUBLE(NativeIndex), - /// Byte array type index - BYTE_ARRAY(NativeIndex), - /// Fixed length byte array type index - FIXED_LEN_BYTE_ARRAY(NativeIndex), -} - -impl Index { - /// Return min/max elements inside ColumnIndex are ordered or not. - pub fn is_sorted(&self) -> bool { - // 0:UNORDERED, 1:ASCENDING ,2:DESCENDING, - if let Some(order) = self.get_boundary_order() { - order != BoundaryOrder::UNORDERED - } else { - false - } - } - - /// Get boundary_order of this page index. - pub fn get_boundary_order(&self) -> Option { - match self { - Index::NONE => None, - Index::BOOLEAN(index) => Some(index.boundary_order), - Index::INT32(index) => Some(index.boundary_order), - Index::INT64(index) => Some(index.boundary_order), - Index::INT96(index) => Some(index.boundary_order), - Index::FLOAT(index) => Some(index.boundary_order), - Index::DOUBLE(index) => Some(index.boundary_order), - Index::BYTE_ARRAY(index) => Some(index.boundary_order), - Index::FIXED_LEN_BYTE_ARRAY(index) => Some(index.boundary_order), - } - } -} - -/// Strongly typed statistics for data pages in a column chunk. -/// -/// This structure is a natively typed, in memory representation of the -/// [`ColumnIndex`] structure in a parquet file footer, as described in the -/// Parquet [PageIndex documentation]. The statistics stored in this structure -/// can be used by query engines to skip decoding pages while reading parquet -/// data. -/// -/// # Differences with Row Group Level Statistics -/// -/// One significant difference between `NativeIndex` and row group level -/// [`Statistics`] is that page level statistics may not store actual column -/// values as min and max (e.g. they may store truncated strings to save space) -/// -/// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -/// [`Statistics`]: crate::file::statistics::Statistics -/// [`ColumnIndex`]: crate::format::ColumnIndex -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct NativeIndex { - /// The actual column indexes, one item per page - pub indexes: Vec>, - /// If the min/max elements are ordered, and if so in which - /// direction. See [source] for details. - /// - /// [source]: https://github.com/apache/parquet-format/blob/bfc549b93e6927cb1fc425466e4084f76edc6d22/src/main/thrift/parquet.thrift#L959-L964 - pub boundary_order: BoundaryOrder, -} - -impl NativeIndex { - /// The physical data type of the column - pub const PHYSICAL_TYPE: Type = T::PHYSICAL_TYPE; - - /// Creates a new [`NativeIndex`] - #[allow(dead_code)] - pub(crate) fn try_new(index: crate::format::ColumnIndex) -> Result { - let len = index.min_values.len(); - - let null_counts = index - .null_counts - .map(|x| x.into_iter().map(Some).collect::>()) - .unwrap_or_else(|| vec![None; len]); - - // histograms are a 1D array encoding a 2D num_pages X num_levels matrix. - let to_page_histograms = |opt_hist: Option>| { - if let Some(hist) = opt_hist { - // TODO: should we assert (hist.len() % len) == 0? - let num_levels = hist.len() / len; - let mut res = Vec::with_capacity(len); - for i in 0..len { - let page_idx = i * num_levels; - let page_hist = hist[page_idx..page_idx + num_levels].to_vec(); - res.push(Some(LevelHistogram::from(page_hist))); - } - res - } else { - vec![None; len] - } - }; - - let rep_hists: Vec> = - to_page_histograms(index.repetition_level_histograms); - let def_hists: Vec> = - to_page_histograms(index.definition_level_histograms); - - let indexes = index - .min_values - .iter() - .zip(index.max_values.iter()) - .zip(index.null_pages.into_iter()) - .zip(null_counts.into_iter()) - .zip(rep_hists.into_iter()) - .zip(def_hists.into_iter()) - .map( - |( - ((((min, max), is_null), null_count), repetition_level_histogram), - definition_level_histogram, - )| { - let (min, max) = if is_null { - (None, None) - } else { - ( - Some(T::try_from_le_slice(min)?), - Some(T::try_from_le_slice(max)?), - ) - }; - Ok(PageIndex { - min, - max, - null_count, - repetition_level_histogram, - definition_level_histogram, - }) - }, - ) - .collect::, ParquetError>>()?; - - let boundary_order = index.boundary_order.try_into()?; - Ok(Self { - indexes, - boundary_order, - }) - } - - pub(crate) fn to_thrift(&self) -> crate::format::ColumnIndex { - let min_values = self - .indexes - .iter() - .map(|x| x.min_bytes().unwrap_or(&[]).to_vec()) - .collect::>(); - - let max_values = self - .indexes - .iter() - .map(|x| x.max_bytes().unwrap_or(&[]).to_vec()) - .collect::>(); - - let null_counts = self - .indexes - .iter() - .map(|x| x.null_count()) - .collect::>>(); - - // Concatenate page histograms into a single Option - let repetition_level_histograms = self - .indexes - .iter() - .map(|x| x.repetition_level_histogram().map(|v| v.values())) - .collect::>>() - .map(|hists| hists.concat()); - - let definition_level_histograms = self - .indexes - .iter() - .map(|x| x.definition_level_histogram().map(|v| v.values())) - .collect::>>() - .map(|hists| hists.concat()); - - crate::format::ColumnIndex::new( - self.indexes.iter().map(|x| x.min().is_none()).collect(), - min_values, - max_values, - self.boundary_order.into(), - null_counts, - repetition_level_histograms, - definition_level_histograms, - ) - } - - /// Creates a new [`NativeIndex`] - #[allow(dead_code)] - pub(super) fn try_new_local(index: ThriftColumnIndex) -> Result { - let len = index.min_values.len(); - - // turn Option> into Vec> - let null_counts = index - .null_counts - .map(|x| x.into_iter().map(Some).collect::>()) - .unwrap_or_else(|| vec![None; len]); - - // histograms are a 1D array encoding a 2D num_pages X num_levels matrix. - let to_page_histograms = |opt_hist: Option>| { - if let Some(hist) = opt_hist { - // TODO: should we assert (hist.len() % len) == 0? - let num_levels = hist.len() / len; - let mut res = Vec::with_capacity(len); - for i in 0..len { - let page_idx = i * num_levels; - let page_hist = hist[page_idx..page_idx + num_levels].to_vec(); - res.push(Some(LevelHistogram::from(page_hist))); - } - res - } else { - vec![None; len] - } - }; - - // turn Option> into Vec> - let rep_hists: Vec> = - to_page_histograms(index.repetition_level_histograms); - let def_hists: Vec> = - to_page_histograms(index.definition_level_histograms); - - // start assembling Vec - let mut indexes: Vec> = Vec::with_capacity(len); - let mut rep_iter = rep_hists.into_iter(); - let mut def_iter = def_hists.into_iter(); - - // this used to zip together the other iters, but that was quite a bit - // slower than this approach. - for (i, null_count) in null_counts.into_iter().enumerate().take(len) { - let is_null = index.null_pages[i]; - let min = if is_null { - None - } else { - Some(T::try_from_le_slice(index.min_values[i])?) - }; - let max = if is_null { - None - } else { - Some(T::try_from_le_slice(index.max_values[i])?) - }; - - indexes.push(PageIndex { - min, - max, - null_count, - repetition_level_histogram: rep_iter.next().unwrap_or(None), - definition_level_histogram: def_iter.next().unwrap_or(None), - }) - } - - let boundary_order = index.boundary_order; - Ok(Self { - indexes, - boundary_order, - }) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_page_index_min_max_null() { - let page_index = PageIndex { - min: Some(-123), - max: Some(234), - null_count: Some(0), - repetition_level_histogram: Some(LevelHistogram::from(vec![1, 2])), - definition_level_histogram: Some(LevelHistogram::from(vec![1, 2, 3])), - }; - - assert_eq!(page_index.min().unwrap(), &-123); - assert_eq!(page_index.max().unwrap(), &234); - assert_eq!(page_index.min_bytes().unwrap(), (-123).as_bytes()); - assert_eq!(page_index.max_bytes().unwrap(), 234.as_bytes()); - assert_eq!(page_index.null_count().unwrap(), 0); - assert_eq!( - page_index.repetition_level_histogram().unwrap().values(), - &vec![1, 2] - ); - assert_eq!( - page_index.definition_level_histogram().unwrap().values(), - &vec![1, 2, 3] - ); - } - - #[test] - fn test_page_index_min_max_null_none() { - let page_index: PageIndex = PageIndex { - min: None, - max: None, - null_count: None, - repetition_level_histogram: None, - definition_level_histogram: None, - }; - - assert_eq!(page_index.min(), None); - assert_eq!(page_index.max(), None); - assert_eq!(page_index.min_bytes(), None); - assert_eq!(page_index.max_bytes(), None); - assert_eq!(page_index.null_count(), None); - assert_eq!(page_index.repetition_level_histogram(), None); - assert_eq!(page_index.definition_level_histogram(), None); - } - - #[test] - fn test_invalid_column_index() { - let column_index = crate::format::ColumnIndex { - null_pages: vec![true, false], - min_values: vec![ - vec![], - vec![], // this shouldn't be empty as null_pages[1] is false - ], - max_values: vec![ - vec![], - vec![], // this shouldn't be empty as null_pages[1] is false - ], - null_counts: None, - repetition_level_histograms: None, - definition_level_histograms: None, - boundary_order: crate::format::BoundaryOrder::UNORDERED, - }; - - let err = NativeIndex::::try_new(column_index).unwrap_err(); - assert_eq!( - err.to_string(), - "Parquet error: error converting value, expected 4 bytes got 0" - ); - } -} diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index 3db597954e6c..fbf97ad92cce 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -171,16 +171,28 @@ pub(crate) fn decode_column_index( let index = match column_type { Type::BOOLEAN => { - ColumnIndexMetaData::BOOLEAN(PrimitiveColumnIndex::::try_new(index)?) + ColumnIndexMetaData::BOOLEAN(PrimitiveColumnIndex::::try_from_thrift(index)?) + } + Type::INT32 => { + ColumnIndexMetaData::INT32(PrimitiveColumnIndex::::try_from_thrift(index)?) + } + Type::INT64 => { + ColumnIndexMetaData::INT64(PrimitiveColumnIndex::::try_from_thrift(index)?) + } + Type::INT96 => { + ColumnIndexMetaData::INT96(PrimitiveColumnIndex::::try_from_thrift(index)?) + } + Type::FLOAT => { + ColumnIndexMetaData::FLOAT(PrimitiveColumnIndex::::try_from_thrift(index)?) + } + Type::DOUBLE => { + ColumnIndexMetaData::DOUBLE(PrimitiveColumnIndex::::try_from_thrift(index)?) + } + Type::BYTE_ARRAY => { + ColumnIndexMetaData::BYTE_ARRAY(ByteArrayColumnIndex::try_from_thrift(index)?) } - Type::INT32 => ColumnIndexMetaData::INT32(PrimitiveColumnIndex::::try_new(index)?), - Type::INT64 => ColumnIndexMetaData::INT64(PrimitiveColumnIndex::::try_new(index)?), - Type::INT96 => ColumnIndexMetaData::INT96(PrimitiveColumnIndex::::try_new(index)?), - Type::FLOAT => ColumnIndexMetaData::FLOAT(PrimitiveColumnIndex::::try_new(index)?), - Type::DOUBLE => ColumnIndexMetaData::DOUBLE(PrimitiveColumnIndex::::try_new(index)?), - Type::BYTE_ARRAY => ColumnIndexMetaData::BYTE_ARRAY(ByteArrayColumnIndex::try_new(index)?), Type::FIXED_LEN_BYTE_ARRAY => { - ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex::try_new(index)?) + ColumnIndexMetaData::FIXED_LEN_BYTE_ARRAY(ByteArrayColumnIndex::try_from_thrift(index)?) } }; diff --git a/parquet/src/file/page_index/mod.rs b/parquet/src/file/page_index/mod.rs index ff70e2eca5dd..71b8290d5d36 100644 --- a/parquet/src/file/page_index/mod.rs +++ b/parquet/src/file/page_index/mod.rs @@ -20,6 +20,5 @@ //! [Column Index]: https://github.com/apache/parquet-format/blob/master/PageIndex.md pub mod column_index; -pub mod index; pub mod index_reader; pub mod offset_index; diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index 2153b8ed3009..30b58ce0acb3 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -102,14 +102,6 @@ impl OffsetIndexMetaData { self.unencoded_byte_array_data_bytes.as_ref() } - pub(crate) fn to_thrift(&self) -> crate::format::OffsetIndex { - let page_locations = self.page_locations.iter().map(|loc| loc.into()).collect(); - crate::format::OffsetIndex::new( - page_locations, - self.unencoded_byte_array_data_bytes.clone(), - ) - } - // Fast-path read of offset index. This works because we expect all field deltas to be 1, // and there's no nesting beyond PageLocation, so no need to save the last field id. Like // read_page_locations(), this will fail if absolute field id's are used. diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs index b6003dc4d9dc..a76db6465602 100644 --- a/parquet/src/file/properties.rs +++ b/parquet/src/file/properties.rs @@ -639,7 +639,7 @@ impl WriterPropertiesBuilder { /// * If `Some`, must be greater than 0, otherwise will panic /// * If `None`, there's no effective limit. /// - /// [`Index`]: crate::file::page_index::index::Index + /// [`Index`]: crate::file::page_index::column_index::ColumnIndexMetaData pub fn set_column_index_truncate_length(mut self, max_length: Option) -> Self { if let Some(value) = max_length { assert!(value > 0, "Cannot have a 0 column index truncate length. If you wish to disable min/max value truncation, set it to `None`."); diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index e51f445b7e7e..38c0d1ff06a0 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -518,15 +518,14 @@ pub(crate) fn page_stats_to_thrift(stats: Option<&Statistics>) -> Option = Box< &'a mut TrackedWrite, RowGroupMetaData, Vec>, - Vec>, + Vec>, Vec>, ) -> Result<()> + 'a @@ -160,7 +160,7 @@ pub struct SerializedFileWriter { props: WriterPropertiesPtr, row_groups: Vec, bloom_filters: Vec>>, - column_indexes: Vec>>, + column_indexes: Vec>>, offset_indexes: Vec>>, row_group_index: usize, // kv_metadatas will be appended to `props` when `write_metadata` @@ -347,9 +347,6 @@ impl SerializedFileWriter { .map(|v| v.to_thrift()) .collect::>(); - let column_indexes = self.convert_column_indexes(); - let offset_indexes = self.convert_offset_index(); - let mut encoder = ThriftMetadataWriter::new( &mut self.buf, &self.schema, @@ -368,45 +365,11 @@ impl SerializedFileWriter { encoder = encoder.with_key_value_metadata(key_value_metadata) } - encoder = encoder.with_column_indexes(&column_indexes); - encoder = encoder.with_offset_indexes(&offset_indexes); + encoder = encoder.with_column_indexes(&self.column_indexes); + encoder = encoder.with_offset_indexes(&self.offset_indexes); encoder.finish() } - fn convert_column_indexes(&self) -> Vec>> { - self.column_indexes - .iter() - .map(|cis| { - cis.iter() - .map(|ci| { - ci.as_ref().map(|column_index| match column_index { - Index::NONE => panic!("trying to serialize missing column index"), - Index::BOOLEAN(column_index) => column_index.to_thrift(), - Index::BYTE_ARRAY(column_index) => column_index.to_thrift(), - Index::DOUBLE(column_index) => column_index.to_thrift(), - Index::FIXED_LEN_BYTE_ARRAY(column_index) => column_index.to_thrift(), - Index::FLOAT(column_index) => column_index.to_thrift(), - Index::INT32(column_index) => column_index.to_thrift(), - Index::INT64(column_index) => column_index.to_thrift(), - Index::INT96(column_index) => column_index.to_thrift(), - }) - }) - .collect() - }) - .collect() - } - - fn convert_offset_index(&self) -> Vec>> { - self.offset_indexes - .iter() - .map(|ois| { - ois.iter() - .map(|oi| oi.as_ref().map(|offset_index| offset_index.to_thrift())) - .collect() - }) - .collect() - } - #[inline] fn assert_previous_writer_closed(&self) -> Result<()> { if self.finished { @@ -546,7 +509,7 @@ pub struct SerializedRowGroupWriter<'a, W: Write> { row_group_metadata: Option, column_chunks: Vec, bloom_filters: Vec>, - column_indexes: Vec>, + column_indexes: Vec>, offset_indexes: Vec>, row_group_index: i16, file_offset: i64, From aa26c0cfb1493ce112cf65144783bfef42bf8c94 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Fri, 26 Sep 2025 09:23:33 -0700 Subject: [PATCH 13/15] [thrift-remodel] Use new writer to write Parquet file metadata (#8445) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change This PR closes the loop and and now Parquet metadata is completely handled by the new code. # What changes are included in this PR? Changes the metadata builders to use the new structs rather than those from `format`. As a consequence, the `close` methods no longer return a `format::FileMetaData` but instead return a `ParquetMetaData`. # Are these changes tested? Covered by existing tests, but many tests were modified to deal with the switch to `ParquetMetaData` mentioned above. # Are there any user-facing changes? Yes --- parquet/benches/metadata.rs | 46 +-- parquet/src/arrow/arrow_reader/mod.rs | 3 +- parquet/src/arrow/arrow_writer/mod.rs | 87 ++--- parquet/src/arrow/async_writer/mod.rs | 6 +- parquet/src/column/writer/mod.rs | 13 +- parquet/src/encryption/encrypt.rs | 35 +- parquet/src/file/column_crypto_metadata.rs | 16 + parquet/src/file/metadata/memory.rs | 7 + parquet/src/file/metadata/mod.rs | 10 +- parquet/src/file/metadata/thrift_gen.rs | 373 +++++++++++++++++-- parquet/src/file/metadata/writer.rs | 328 ++++++++-------- parquet/src/file/serialized_reader.rs | 4 +- parquet/src/file/writer.rs | 116 +++--- parquet/src/schema/types.rs | 18 + parquet/tests/encryption/encryption.rs | 20 +- parquet/tests/encryption/encryption_async.rs | 10 +- 16 files changed, 711 insertions(+), 381 deletions(-) diff --git a/parquet/benches/metadata.rs b/parquet/benches/metadata.rs index ced0175da878..d05f1e09cb11 100644 --- a/parquet/benches/metadata.rs +++ b/parquet/benches/metadata.rs @@ -15,6 +15,8 @@ // specific language governing permissions and limitations // under the License. +#[cfg(feature = "arrow")] +use parquet::file::metadata::ParquetMetaData; use parquet::file::metadata::ParquetMetaDataReader; use rand::Rng; use thrift::protocol::TCompactOutputProtocol; @@ -164,7 +166,7 @@ fn get_footer_bytes(data: Bytes) -> Bytes { } #[cfg(feature = "arrow")] -fn rewrite_file(bytes: Bytes) -> (Bytes, FileMetaData) { +fn rewrite_file(bytes: Bytes) -> (Bytes, ParquetMetaData) { use arrow::array::RecordBatchReader; use parquet::arrow::{arrow_reader::ParquetRecordBatchReaderBuilder, ArrowWriter}; use parquet::file::properties::{EnabledStatistics, WriterProperties}; @@ -217,6 +219,7 @@ fn criterion_benchmark(c: &mut Criterion) { }) }); + // FIXME(ets): remove benches of private APIs c.bench_function("decode thrift file metadata", |b| { b.iter(|| { parquet::thrift::bench_file_metadata(&meta_data); @@ -237,45 +240,42 @@ fn criterion_benchmark(c: &mut Criterion) { }); // rewrite file with page statistics. then read page headers. + // FIXME(ets): remove the page header benches when remodel is complete #[cfg(feature = "arrow")] let (file_bytes, metadata) = rewrite_file(data.clone()); #[cfg(feature = "arrow")] c.bench_function("page headers", |b| { b.iter(|| { - metadata.row_groups.iter().for_each(|rg| { - rg.columns.iter().for_each(|col| { - if let Some(col_meta) = &col.meta_data { - if let Some(dict_offset) = col_meta.dictionary_page_offset { - parquet::thrift::bench_page_header( - &file_bytes.slice(dict_offset as usize..), - ); - } + for rg in metadata.row_groups() { + for col in rg.columns() { + if let Some(dict_offset) = col.dictionary_page_offset() { parquet::thrift::bench_page_header( - &file_bytes.slice(col_meta.data_page_offset as usize..), + &file_bytes.slice(dict_offset as usize..), ); } - }); - }); + parquet::thrift::bench_page_header( + &file_bytes.slice(col.data_page_offset() as usize..), + ); + } + } }) }); #[cfg(feature = "arrow")] c.bench_function("page headers (no stats)", |b| { b.iter(|| { - metadata.row_groups.iter().for_each(|rg| { - rg.columns.iter().for_each(|col| { - if let Some(col_meta) = &col.meta_data { - if let Some(dict_offset) = col_meta.dictionary_page_offset { - parquet::thrift::bench_page_header_no_stats( - &file_bytes.slice(dict_offset as usize..), - ); - } + for rg in metadata.row_groups() { + for col in rg.columns() { + if let Some(dict_offset) = col.dictionary_page_offset() { parquet::thrift::bench_page_header_no_stats( - &file_bytes.slice(col_meta.data_page_offset as usize..), + &file_bytes.slice(dict_offset as usize..), ); } - }); - }); + parquet::thrift::bench_page_header_no_stats( + &file_bytes.slice(col.data_page_offset() as usize..), + ); + } + } }) }); } diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs index 00beaa364f16..8d5b3b55c183 100644 --- a/parquet/src/arrow/arrow_reader/mod.rs +++ b/parquet/src/arrow/arrow_reader/mod.rs @@ -1185,6 +1185,7 @@ mod tests { FloatType, Int32Type, Int64Type, Int96, Int96Type, }; use crate::errors::Result; + use crate::file::metadata::ParquetMetaData; use crate::file::properties::{EnabledStatistics, WriterProperties, WriterVersion}; use crate::file::writer::SerializedFileWriter; use crate::schema::parser::parse_message_type; @@ -2913,7 +2914,7 @@ mod tests { schema: TypePtr, field: Option, opts: &TestOptions, - ) -> Result { + ) -> Result { let mut writer_props = opts.writer_props(); if let Some(field) = field { let arrow_schema = Schema::new(vec![field]); diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs index 4fae03affa17..6b4dc87abba4 100644 --- a/parquet/src/arrow/arrow_writer/mod.rs +++ b/parquet/src/arrow/arrow_writer/mod.rs @@ -43,7 +43,7 @@ use crate::data_type::{ByteArray, FixedLenByteArray}; #[cfg(feature = "encryption")] use crate::encryption::encrypt::FileEncryptor; use crate::errors::{ParquetError, Result}; -use crate::file::metadata::{KeyValue, RowGroupMetaData}; +use crate::file::metadata::{KeyValue, ParquetMetaData, RowGroupMetaData}; use crate::file::properties::{WriterProperties, WriterPropertiesPtr}; use crate::file::reader::{ChunkReader, Length}; use crate::file::writer::{SerializedFileWriter, SerializedRowGroupWriter}; @@ -397,13 +397,13 @@ impl ArrowWriter { /// Unlike [`Self::close`] this does not consume self /// /// Attempting to write after calling finish will result in an error - pub fn finish(&mut self) -> Result { + pub fn finish(&mut self) -> Result { self.flush()?; self.writer.finish() } /// Close and finalize the underlying Parquet writer - pub fn close(mut self) -> Result { + pub fn close(mut self) -> Result { self.finish() } @@ -754,7 +754,7 @@ impl ArrowColumnChunk { /// row_group_writer.close().unwrap(); /// /// let metadata = writer.close().unwrap(); -/// assert_eq!(metadata.num_rows, 3); +/// assert_eq!(metadata.file_metadata().num_rows(), 3); /// ``` pub struct ArrowColumnWriter { writer: ArrowColumnWriterImpl, @@ -1510,7 +1510,6 @@ mod tests { use crate::arrow::ARROW_SCHEMA_META_KEY; use crate::column::page::{Page, PageReader}; use crate::file::metadata::thrift_gen::PageHeader; - use crate::file::page_encoding_stats::PageEncodingStats; use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::reader::SerializedPageReader; use crate::parquet_thrift::{ReadThrift, ThriftSliceInputProtocol}; @@ -2579,12 +2578,12 @@ mod tests { ArrowWriter::try_new(&mut out, batch.schema(), None).expect("Unable to write file"); writer.write(&batch).unwrap(); let file_meta_data = writer.close().unwrap(); - for row_group in file_meta_data.row_groups { - for column in row_group.columns { - assert!(column.offset_index_offset.is_some()); - assert!(column.offset_index_length.is_some()); - assert!(column.column_index_offset.is_none()); - assert!(column.column_index_length.is_none()); + for row_group in file_meta_data.row_groups() { + for column in row_group.columns() { + assert!(column.offset_index_offset().is_some()); + assert!(column.offset_index_length().is_some()); + assert!(column.column_index_offset().is_none()); + assert!(column.column_index_length().is_none()); } } } @@ -3033,14 +3032,18 @@ mod tests { writer.write(&batch).unwrap(); let file_metadata = writer.close().unwrap(); + let schema = file_metadata.file_metadata().schema(); // Coerced name of "item" should be "element" - assert_eq!(file_metadata.schema[3].name, "element"); + let list_field = &schema.get_fields()[0].get_fields()[0]; + assert_eq!(list_field.get_fields()[0].name(), "element"); + + let map_field = &schema.get_fields()[1].get_fields()[0]; // Coerced name of "entries" should be "key_value" - assert_eq!(file_metadata.schema[5].name, "key_value"); + assert_eq!(map_field.name(), "key_value"); // Coerced name of "keys" should be "key" - assert_eq!(file_metadata.schema[6].name, "key"); + assert_eq!(map_field.get_fields()[0].name(), "key"); // Coerced name of "values" should be "value" - assert_eq!(file_metadata.schema[7].name, "value"); + assert_eq!(map_field.get_fields()[1].name(), "value"); // Double check schema after reading from the file let reader = SerializedFileReader::new(file).unwrap(); @@ -3984,15 +3987,15 @@ mod tests { writer.write(&batch).unwrap(); let metadata = writer.close().unwrap(); - assert_eq!(metadata.row_groups.len(), 1); - let row_group = &metadata.row_groups[0]; - assert_eq!(row_group.columns.len(), 2); + assert_eq!(metadata.num_row_groups(), 1); + let row_group = metadata.row_group(0); + assert_eq!(row_group.num_columns(), 2); // Column "a" has both offset and column index, as requested - assert!(row_group.columns[0].offset_index_offset.is_some()); - assert!(row_group.columns[0].column_index_offset.is_some()); + assert!(row_group.column(0).offset_index_offset().is_some()); + assert!(row_group.column(0).column_index_offset().is_some()); // Column "b" should only have offset index - assert!(row_group.columns[1].offset_index_offset.is_some()); - assert!(row_group.columns[1].column_index_offset.is_none()); + assert!(row_group.column(1).offset_index_offset().is_some()); + assert!(row_group.column(1).column_index_offset().is_none()); let options = ReadOptionsBuilder::new().with_page_index().build(); let reader = SerializedFileReader::new_with_options(Bytes::from(buf), options).unwrap(); @@ -4059,15 +4062,15 @@ mod tests { writer.write(&batch).unwrap(); let metadata = writer.close().unwrap(); - assert_eq!(metadata.row_groups.len(), 1); - let row_group = &metadata.row_groups[0]; - assert_eq!(row_group.columns.len(), 2); + assert_eq!(metadata.num_row_groups(), 1); + let row_group = metadata.row_group(0); + assert_eq!(row_group.num_columns(), 2); // Column "a" should only have offset index - assert!(row_group.columns[0].offset_index_offset.is_some()); - assert!(row_group.columns[0].column_index_offset.is_none()); + assert!(row_group.column(0).offset_index_offset().is_some()); + assert!(row_group.column(0).column_index_offset().is_none()); // Column "b" should only have offset index - assert!(row_group.columns[1].offset_index_offset.is_some()); - assert!(row_group.columns[1].column_index_offset.is_none()); + assert!(row_group.column(1).offset_index_offset().is_some()); + assert!(row_group.column(1).column_index_offset().is_none()); let options = ReadOptionsBuilder::new().with_page_index().build(); let reader = SerializedFileReader::new_with_options(Bytes::from(buf), options).unwrap(); @@ -4331,14 +4334,18 @@ mod tests { writer.write(&batch).unwrap(); let file_metadata = writer.close().unwrap(); - assert_eq!(file_metadata.row_groups.len(), 1); - assert_eq!(file_metadata.row_groups[0].columns.len(), 1); - let chunk_meta = file_metadata.row_groups[0].columns[0] - .meta_data - .as_ref() - .expect("column metadata missing"); - assert!(chunk_meta.encoding_stats.is_some()); - let chunk_page_stats = chunk_meta.encoding_stats.as_ref().unwrap(); + assert_eq!(file_metadata.num_row_groups(), 1); + assert_eq!(file_metadata.row_group(0).num_columns(), 1); + assert!(file_metadata + .row_group(0) + .column(0) + .page_encoding_stats() + .is_some()); + let chunk_page_stats = file_metadata + .row_group(0) + .column(0) + .page_encoding_stats() + .unwrap(); // check that the read metadata is also correct let options = ReadOptionsBuilder::new().with_page_index().build(); @@ -4349,11 +4356,7 @@ mod tests { let column = rowgroup.metadata().column(0); assert!(column.page_encoding_stats().is_some()); let file_page_stats = column.page_encoding_stats().unwrap(); - let chunk_stats: Vec = chunk_page_stats - .iter() - .map(|x| crate::file::page_encoding_stats::try_from_thrift(x).unwrap()) - .collect(); - assert_eq!(&chunk_stats, file_page_stats); + assert_eq!(chunk_page_stats, file_page_stats); } #[test] diff --git a/parquet/src/arrow/async_writer/mod.rs b/parquet/src/arrow/async_writer/mod.rs index e61b8f47c31f..232333a1b486 100644 --- a/parquet/src/arrow/async_writer/mod.rs +++ b/parquet/src/arrow/async_writer/mod.rs @@ -65,7 +65,7 @@ use crate::{ arrow::ArrowWriter, errors::{ParquetError, Result}, file::{ - metadata::{KeyValue, RowGroupMetaData}, + metadata::{KeyValue, ParquetMetaData, RowGroupMetaData}, properties::WriterProperties, }, }; @@ -247,7 +247,7 @@ impl AsyncArrowWriter { /// Unlike [`Self::close`] this does not consume self /// /// Attempting to write after calling finish will result in an error - pub async fn finish(&mut self) -> Result { + pub async fn finish(&mut self) -> Result { let metadata = self.sync_writer.finish()?; // Force to flush the remaining data. @@ -260,7 +260,7 @@ impl AsyncArrowWriter { /// Close and finalize the writer. /// /// All the data in the inner buffer will be force flushed. - pub async fn close(mut self) -> Result { + pub async fn close(mut self) -> Result { self.finish().await } diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 3f516462f20c..ee400f200e4d 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -3602,19 +3602,12 @@ mod tests { col_writer.close().unwrap(); row_group_writer.close().unwrap(); let file_metadata = writer.close().unwrap(); - assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some()); - let stats = file_metadata.row_groups[0].columns[0] - .meta_data - .as_ref() - .unwrap() - .statistics - .as_ref() - .unwrap(); - assert!(!stats.is_max_value_exact.unwrap()); + let stats = file_metadata.row_group(0).column(0).statistics().unwrap(); + assert!(!stats.max_is_exact()); // Truncation of invalid UTF-8 should fall back to binary truncation, so last byte should // be incremented by 1. assert_eq!( - stats.max_value, + stats.max_bytes_opt().map(|v| v.to_vec()), Some([128, 128, 128, 128, 128, 128, 128, 129].to_vec()) ); } diff --git a/parquet/src/encryption/encrypt.rs b/parquet/src/encryption/encrypt.rs index 97893021699e..1a22abff56fa 100644 --- a/parquet/src/encryption/encrypt.rs +++ b/parquet/src/encryption/encrypt.rs @@ -24,11 +24,9 @@ use crate::errors::{ParquetError, Result}; use crate::file::column_crypto_metadata::{ColumnCryptoMetaData, EncryptionWithColumnKey}; use crate::parquet_thrift::{ThriftCompactOutputProtocol, WriteThrift}; use crate::schema::types::{ColumnDescPtr, SchemaDescriptor}; -use crate::thrift::TSerializable; use ring::rand::{SecureRandom, SystemRandom}; use std::collections::{HashMap, HashSet}; use std::io::Write; -use thrift::protocol::TCompactOutputProtocol; #[derive(Debug, Clone, PartialEq)] struct EncryptionKey { @@ -365,18 +363,6 @@ impl FileEncryptor { } } -/// Write an encrypted Thrift serializable object -pub(crate) fn encrypt_object( - object: &T, - encryptor: &mut Box, - sink: &mut W, - module_aad: &[u8], -) -> Result<()> { - let encrypted_buffer = encrypt_object_to_vec(object, encryptor, module_aad)?; - sink.write_all(&encrypted_buffer)?; - Ok(()) -} - /// Write an encrypted Thrift serializable object pub(crate) fn encrypt_thrift_object( object: &T, @@ -389,7 +375,7 @@ pub(crate) fn encrypt_thrift_object( Ok(()) } -pub(crate) fn write_signed_plaintext_object( +pub(crate) fn write_signed_plaintext_thrift_object( object: &T, encryptor: &mut Box, sink: &mut W, @@ -397,8 +383,8 @@ pub(crate) fn write_signed_plaintext_object( ) -> Result<()> { let mut buffer: Vec = vec![]; { - let mut protocol = TCompactOutputProtocol::new(&mut buffer); - object.write_to_out_protocol(&mut protocol)?; + let mut protocol = ThriftCompactOutputProtocol::new(&mut buffer); + object.write_thrift(&mut protocol)?; } sink.write_all(&buffer)?; buffer = encryptor.encrypt(buffer.as_ref(), module_aad)?; @@ -412,21 +398,6 @@ pub(crate) fn write_signed_plaintext_object( Ok(()) } -/// Encrypt a Thrift serializable object to a byte vector -pub(crate) fn encrypt_object_to_vec( - object: &T, - encryptor: &mut Box, - module_aad: &[u8], -) -> Result> { - let mut buffer: Vec = vec![]; - { - let mut unencrypted_protocol = TCompactOutputProtocol::new(&mut buffer); - object.write_to_out_protocol(&mut unencrypted_protocol)?; - } - - encryptor.encrypt(buffer.as_ref(), module_aad) -} - /// Encrypt a Thrift serializable object to a byte vector pub(crate) fn encrypt_thrift_object_to_vec( object: &T, diff --git a/parquet/src/file/column_crypto_metadata.rs b/parquet/src/file/column_crypto_metadata.rs index 6a538bd42bc0..429e7946dd67 100644 --- a/parquet/src/file/column_crypto_metadata.rs +++ b/parquet/src/file/column_crypto_metadata.rs @@ -20,6 +20,7 @@ use std::io::Write; use crate::errors::{ParquetError, Result}; +use crate::file::metadata::HeapSize; use crate::format::{ ColumnCryptoMetaData as TColumnCryptoMetaData, EncryptionWithColumnKey as TEncryptionWithColumnKey, @@ -45,6 +46,12 @@ pub struct EncryptionWithColumnKey { } ); +impl HeapSize for EncryptionWithColumnKey { + fn heap_size(&self) -> usize { + self.path_in_schema.heap_size() + self.key_metadata.heap_size() + } +} + thrift_union!( /// ColumnCryptoMetadata for a column chunk union ColumnCryptoMetaData { @@ -53,6 +60,15 @@ union ColumnCryptoMetaData { } ); +impl HeapSize for ColumnCryptoMetaData { + fn heap_size(&self) -> usize { + match self { + Self::ENCRYPTION_WITH_FOOTER_KEY => 0, + Self::ENCRYPTION_WITH_COLUMN_KEY(path) => path.heap_size(), + } + } +} + /// Converts Thrift definition into `ColumnCryptoMetadata`. pub fn try_from_thrift( thrift_column_crypto_metadata: &TColumnCryptoMetaData, diff --git a/parquet/src/file/metadata/memory.rs b/parquet/src/file/metadata/memory.rs index 19122a1b5522..bfe6b0255c5c 100644 --- a/parquet/src/file/metadata/memory.rs +++ b/parquet/src/file/metadata/memory.rs @@ -94,6 +94,12 @@ impl HeapSize for RowGroupMetaData { impl HeapSize for ColumnChunkMetaData { fn heap_size(&self) -> usize { + #[cfg(feature = "encryption")] + let encryption_heap_size = + self.column_crypto_metadata.heap_size() + self.encrypted_column_metadata.heap_size(); + #[cfg(not(feature = "encryption"))] + let encryption_heap_size = 0; + // don't count column_descr here because it is already counted in // FileMetaData self.encodings.heap_size() @@ -104,6 +110,7 @@ impl HeapSize for ColumnChunkMetaData { + self.unencoded_byte_array_data_bytes.heap_size() + self.repetition_level_histogram.heap_size() + self.definition_level_histogram.heap_size() + + encryption_heap_size } } diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 94e289ae8134..22c2f8fb440b 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -848,6 +848,8 @@ pub struct ColumnChunkMetaData { definition_level_histogram: Option, #[cfg(feature = "encryption")] column_crypto_metadata: Option, + #[cfg(feature = "encryption")] + encrypted_column_metadata: Option>, } /// Histograms for repetition and definition levels. @@ -1232,6 +1234,8 @@ impl ColumnChunkMetaData { definition_level_histogram, #[cfg(feature = "encryption")] column_crypto_metadata, + #[cfg(feature = "encryption")] + encrypted_column_metadata: None, }; Ok(result) } @@ -1370,6 +1374,8 @@ impl ColumnChunkMetaDataBuilder { definition_level_histogram: None, #[cfg(feature = "encryption")] column_crypto_metadata: None, + #[cfg(feature = "encryption")] + encrypted_column_metadata: None, }) } @@ -2067,7 +2073,7 @@ mod tests { #[cfg(not(feature = "encryption"))] let base_expected_size = 2280; #[cfg(feature = "encryption")] - let base_expected_size = 2616; + let base_expected_size = 2712; assert_eq!(parquet_meta.memory_size(), base_expected_size); @@ -2107,7 +2113,7 @@ mod tests { #[cfg(not(feature = "encryption"))] let bigger_expected_size = 2704; #[cfg(feature = "encryption")] - let bigger_expected_size = 3040; + let bigger_expected_size = 3136; // more set fields means more memory usage assert!(bigger_expected_size > base_expected_size); diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs index 7515a70a63f1..5665ad2ce9ca 100644 --- a/parquet/src/file/metadata/thrift_gen.rs +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -38,7 +38,9 @@ use crate::{ read_thrift_vec, ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, }, - schema::types::{parquet_schema_from_array, ColumnDescriptor, SchemaDescriptor}, + schema::types::{ + num_nodes, parquet_schema_from_array, ColumnDescriptor, SchemaDescriptor, TypePtr, + }, thrift_struct, thrift_union, util::bit_util::FromBytes, }; @@ -68,12 +70,12 @@ pub(crate) struct SchemaElement<'a> { ); thrift_struct!( -pub(crate) struct AesGcmV1<'a> { +pub(crate) struct AesGcmV1 { /// AAD prefix - 1: optional binary<'a> aad_prefix + 1: optional binary aad_prefix /// Unique file identifier part of AAD suffix - 2: optional binary<'a> aad_file_unique + 2: optional binary aad_file_unique /// In files encrypted with AAD prefix without storing it, /// readers must supply the prefix @@ -82,12 +84,12 @@ pub(crate) struct AesGcmV1<'a> { ); thrift_struct!( -pub(crate) struct AesGcmCtrV1<'a> { +pub(crate) struct AesGcmCtrV1 { /// AAD prefix - 1: optional binary<'a> aad_prefix + 1: optional binary aad_prefix /// Unique file identifier part of AAD suffix - 2: optional binary<'a> aad_file_unique + 2: optional binary aad_file_unique /// In files encrypted with AAD prefix without storing it, /// readers must supply the prefix @@ -96,24 +98,24 @@ pub(crate) struct AesGcmCtrV1<'a> { ); thrift_union!( -union EncryptionAlgorithm<'a> { - 1: (AesGcmV1<'a>) AES_GCM_V1 - 2: (AesGcmCtrV1<'a>) AES_GCM_CTR_V1 +union EncryptionAlgorithm { + 1: (AesGcmV1) AES_GCM_V1 + 2: (AesGcmCtrV1) AES_GCM_CTR_V1 } ); #[cfg(feature = "encryption")] thrift_struct!( /// Crypto metadata for files with encrypted footer -pub(crate) struct FileCryptoMetaData<'a> { +pub(crate) struct FileCryptoMetaData { /// Encryption algorithm. This field is only used for files /// with encrypted footer. Files with plaintext footer store algorithm id /// inside footer (FileMetaData structure). - 1: required EncryptionAlgorithm<'a> encryption_algorithm + 1: required EncryptionAlgorithm encryption_algorithm /** Retrieval metadata of key used for encryption of footer, * and (possibly) columns **/ - 2: optional binary<'a> key_metadata + 2: optional binary key_metadata } ); @@ -135,8 +137,8 @@ struct FileMetaData<'a> { 5: optional list key_value_metadata 6: optional string created_by 7: optional list column_orders; - 8: optional EncryptionAlgorithm<'a> encryption_algorithm - 9: optional binary<'a> footer_signing_key_metadata + 8: optional EncryptionAlgorithm encryption_algorithm + 9: optional binary footer_signing_key_metadata } ); @@ -337,8 +339,6 @@ fn convert_column( let repetition_level_histogram = repetition_level_histogram.map(LevelHistogram::from); let definition_level_histogram = definition_level_histogram.map(LevelHistogram::from); - // FIXME: need column crypto - let result = ColumnChunkMetaData { column_descr, encodings, @@ -364,6 +364,8 @@ fn convert_column( definition_level_histogram, #[cfg(feature = "encryption")] column_crypto_metadata: column.crypto_metadata, + #[cfg(feature = "encryption")] + encrypted_column_metadata: None, }; Ok(result) } @@ -632,7 +634,7 @@ pub(crate) fn parquet_metadata_with_encryption( } let decryptor = get_file_decryptor( t_file_crypto_metadata.encryption_algorithm, - t_file_crypto_metadata.key_metadata, + t_file_crypto_metadata.key_metadata.as_ref(), file_decryption_properties, )?; let footer_decryptor = decryptor.get_footer_decryptor(); @@ -672,7 +674,7 @@ pub(crate) fn parquet_metadata_with_encryption( // File has a plaintext footer but encryption algorithm is set let file_decryptor_value = get_file_decryptor( algo, - file_meta.footer_signing_key_metadata, + file_meta.footer_signing_key_metadata.as_ref(), file_decryption_properties, )?; if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer { @@ -733,7 +735,7 @@ pub(crate) fn parquet_metadata_with_encryption( #[cfg(feature = "encryption")] pub(super) fn get_file_decryptor( encryption_algorithm: EncryptionAlgorithm, - footer_key_metadata: Option<&[u8]>, + footer_key_metadata: Option<&Vec>, file_decryption_properties: &FileDecryptionProperties, ) -> Result { match encryption_algorithm { @@ -750,7 +752,7 @@ pub(super) fn get_file_decryptor( FileDecryptor::new( file_decryption_properties, - footer_key_metadata, + footer_key_metadata.map(|v| v.as_slice()), aad_file_unique, aad_prefix, ) @@ -1158,6 +1160,335 @@ impl PageHeader { } } +///////////////////////////////////////////////// +// helper functions for writing file meta data + +// serialize the bits of the column chunk needed for a thrift ColumnMetaData +// struct ColumnMetaData { +// 1: required Type type +// 2: required list encodings +// 3: required list path_in_schema +// 4: required CompressionCodec codec +// 5: required i64 num_values +// 6: required i64 total_uncompressed_size +// 7: required i64 total_compressed_size +// 8: optional list key_value_metadata +// 9: required i64 data_page_offset +// 10: optional i64 index_page_offset +// 11: optional i64 dictionary_page_offset +// 12: optional Statistics statistics; +// 13: optional list encoding_stats; +// 14: optional i64 bloom_filter_offset; +// 15: optional i32 bloom_filter_length; +// 16: optional SizeStatistics size_statistics; +// 17: optional GeospatialStatistics geospatial_statistics; +// } +pub(crate) fn serialize_column_meta_data( + column_chunk: &ColumnChunkMetaData, + w: &mut ThriftCompactOutputProtocol, +) -> Result<()> { + use crate::file::statistics::page_stats_to_thrift; + + column_chunk.column_type().write_thrift_field(w, 1, 0)?; + column_chunk.encodings.write_thrift_field(w, 2, 1)?; + let path = column_chunk.column_descr.path().parts(); + let path: Vec<&str> = path.iter().map(|v| v.as_str()).collect(); + path.write_thrift_field(w, 3, 2)?; + column_chunk.compression.write_thrift_field(w, 4, 3)?; + column_chunk.num_values.write_thrift_field(w, 5, 4)?; + column_chunk + .total_uncompressed_size + .write_thrift_field(w, 6, 5)?; + column_chunk + .total_compressed_size + .write_thrift_field(w, 7, 6)?; + // no key_value_metadata here + let mut last_field_id = column_chunk.data_page_offset.write_thrift_field(w, 9, 7)?; + if let Some(index_page_offset) = column_chunk.index_page_offset { + last_field_id = index_page_offset.write_thrift_field(w, 10, last_field_id)?; + } + if let Some(dictionary_page_offset) = column_chunk.dictionary_page_offset { + last_field_id = dictionary_page_offset.write_thrift_field(w, 11, last_field_id)?; + } + // PageStatistics is the same as thrift Statistics, but writable + let stats = page_stats_to_thrift(column_chunk.statistics()); + if let Some(stats) = stats { + last_field_id = stats.write_thrift_field(w, 12, last_field_id)?; + } + if let Some(page_encoding_stats) = column_chunk.page_encoding_stats() { + last_field_id = page_encoding_stats.write_thrift_field(w, 13, last_field_id)?; + } + if let Some(bloom_filter_offset) = column_chunk.bloom_filter_offset { + last_field_id = bloom_filter_offset.write_thrift_field(w, 14, last_field_id)?; + } + if let Some(bloom_filter_length) = column_chunk.bloom_filter_length { + last_field_id = bloom_filter_length.write_thrift_field(w, 15, last_field_id)?; + } + + // SizeStatistics + let size_stats = if column_chunk.unencoded_byte_array_data_bytes.is_some() + || column_chunk.repetition_level_histogram.is_some() + || column_chunk.definition_level_histogram.is_some() + { + let repetition_level_histogram = column_chunk + .repetition_level_histogram() + .map(|hist| hist.clone().into_inner()); + + let definition_level_histogram = column_chunk + .definition_level_histogram() + .map(|hist| hist.clone().into_inner()); + + Some(SizeStatistics { + unencoded_byte_array_data_bytes: column_chunk.unencoded_byte_array_data_bytes, + repetition_level_histogram, + definition_level_histogram, + }) + } else { + None + }; + if let Some(size_stats) = size_stats { + size_stats.write_thrift_field(w, 16, last_field_id)?; + } + + // TODO: field 17 geo spatial stats here + w.write_struct_end() +} + +// temp struct used for writing +pub(crate) struct FileMeta<'a> { + pub(crate) file_metadata: &'a crate::file::metadata::FileMetaData, + pub(crate) row_groups: &'a Vec, + pub(crate) encryption_algorithm: Option, + pub(crate) footer_signing_key_metadata: Option>, +} + +impl<'a> WriteThrift for FileMeta<'a> { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + self.file_metadata + .version + .write_thrift_field(writer, 1, 0)?; + + // field 2 is schema. do depth-first traversal of tree, converting to SchemaElement and + // writing along the way. + let root = self.file_metadata.schema_descr().root_schema_ptr(); + let schema_len = num_nodes(&root); + writer.write_field_begin(FieldType::List, 2, 1)?; + writer.write_list_begin(ElementType::Struct, schema_len)?; + // recursively write Type nodes as SchemaElements + write_schema(&root, writer)?; + + self.file_metadata + .num_rows + .write_thrift_field(writer, 3, 2)?; + + // this will call RowGroupMetaData::write_thrift + let mut last_field_id = self.row_groups.write_thrift_field(writer, 4, 3)?; + + if let Some(kv_metadata) = self.file_metadata.key_value_metadata() { + last_field_id = kv_metadata.write_thrift_field(writer, 5, last_field_id)?; + } + if let Some(created_by) = self.file_metadata.created_by() { + last_field_id = created_by.write_thrift_field(writer, 6, last_field_id)?; + } + if let Some(column_orders) = self.file_metadata.column_orders() { + last_field_id = column_orders.write_thrift_field(writer, 7, last_field_id)?; + } + if let Some(algo) = self.encryption_algorithm.as_ref() { + last_field_id = algo.write_thrift_field(writer, 8, last_field_id)?; + } + if let Some(key) = self.footer_signing_key_metadata.as_ref() { + key.as_slice() + .write_thrift_field(writer, 9, last_field_id)?; + } + + writer.write_struct_end() + } +} + +fn write_schema( + node: &TypePtr, + writer: &mut ThriftCompactOutputProtocol, +) -> Result<()> { + match node.as_ref() { + crate::schema::types::Type::PrimitiveType { + basic_info, + physical_type, + type_length, + scale, + precision, + } => { + let element = SchemaElement { + type_: Some(*physical_type), + type_length: if *type_length >= 0 { + Some(*type_length) + } else { + None + }, + repetition_type: Some(basic_info.repetition()), + name: basic_info.name(), + num_children: None, + converted_type: match basic_info.converted_type() { + ConvertedType::NONE => None, + other => Some(other), + }, + scale: if *scale >= 0 { Some(*scale) } else { None }, + precision: if *precision >= 0 { + Some(*precision) + } else { + None + }, + field_id: if basic_info.has_id() { + Some(basic_info.id()) + } else { + None + }, + logical_type: basic_info.logical_type(), + }; + element.write_thrift(writer) + } + crate::schema::types::Type::GroupType { basic_info, fields } => { + let repetition = if basic_info.has_repetition() { + Some(basic_info.repetition()) + } else { + None + }; + + let element = SchemaElement { + type_: None, + type_length: None, + repetition_type: repetition, + name: basic_info.name(), + num_children: Some(fields.len().try_into()?), + converted_type: match basic_info.converted_type() { + ConvertedType::NONE => None, + other => Some(other), + }, + scale: None, + precision: None, + field_id: if basic_info.has_id() { + Some(basic_info.id()) + } else { + None + }, + logical_type: basic_info.logical_type(), + }; + + element.write_thrift(writer)?; + + // Add child elements for a group + for field in fields { + write_schema(field, writer)?; + } + Ok(()) + } + } +} + +// struct RowGroup { +// 1: required list columns +// 2: required i64 total_byte_size +// 3: required i64 num_rows +// 4: optional list sorting_columns +// 5: optional i64 file_offset +// 6: optional i64 total_compressed_size +// 7: optional i16 ordinal +// } +impl WriteThrift for RowGroupMetaData { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + // this will call ColumnChunkMetaData::write_thrift + self.columns.write_thrift_field(writer, 1, 0)?; + self.total_byte_size.write_thrift_field(writer, 2, 1)?; + let mut last_field_id = self.num_rows.write_thrift_field(writer, 3, 2)?; + if let Some(sorting_columns) = self.sorting_columns() { + last_field_id = sorting_columns.write_thrift_field(writer, 4, last_field_id)?; + } + if let Some(file_offset) = self.file_offset() { + last_field_id = file_offset.write_thrift_field(writer, 5, last_field_id)?; + } + // this is optional, but we'll always write it + last_field_id = self + .compressed_size() + .write_thrift_field(writer, 6, last_field_id)?; + if let Some(ordinal) = self.ordinal() { + ordinal.write_thrift_field(writer, 7, last_field_id)?; + } + writer.write_struct_end() + } +} + +// struct ColumnChunk { +// 1: optional string file_path +// 2: required i64 file_offset = 0 +// 3: optional ColumnMetaData meta_data +// 4: optional i64 offset_index_offset +// 5: optional i32 offset_index_length +// 6: optional i64 column_index_offset +// 7: optional i32 column_index_length +// 8: optional ColumnCryptoMetaData crypto_metadata +// 9: optional binary encrypted_column_metadata +// } +impl WriteThrift for ColumnChunkMetaData { + const ELEMENT_TYPE: ElementType = ElementType::Struct; + + #[allow(unused_assignments)] + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + let mut last_field_id = 0i16; + if let Some(file_path) = self.file_path() { + last_field_id = file_path.write_thrift_field(writer, 1, last_field_id)?; + } + last_field_id = self + .file_offset() + .write_thrift_field(writer, 2, last_field_id)?; + + #[cfg(feature = "encryption")] + { + // only write the ColumnMetaData if we haven't already encrypted it + if self.encrypted_column_metadata.is_none() { + writer.write_field_begin(FieldType::Struct, 3, last_field_id)?; + serialize_column_meta_data(self, writer)?; + last_field_id = 3; + } + } + #[cfg(not(feature = "encryption"))] + { + // always write the ColumnMetaData + writer.write_field_begin(FieldType::Struct, 3, last_field_id)?; + serialize_column_meta_data(self, writer)?; + last_field_id = 3; + } + + if let Some(offset_idx_off) = self.offset_index_offset() { + last_field_id = offset_idx_off.write_thrift_field(writer, 4, last_field_id)?; + } + if let Some(offset_idx_len) = self.offset_index_length() { + last_field_id = offset_idx_len.write_thrift_field(writer, 5, last_field_id)?; + } + if let Some(column_idx_off) = self.column_index_offset() { + last_field_id = column_idx_off.write_thrift_field(writer, 6, last_field_id)?; + } + if let Some(column_idx_len) = self.column_index_length() { + last_field_id = column_idx_len.write_thrift_field(writer, 7, last_field_id)?; + } + #[cfg(feature = "encryption")] + { + if let Some(crypto_metadata) = self.crypto_metadata() { + last_field_id = crypto_metadata.write_thrift_field(writer, 8, last_field_id)?; + } + if let Some(encrypted_meta) = self.encrypted_column_metadata.as_ref() { + encrypted_meta + .as_slice() + .write_thrift_field(writer, 9, last_field_id)?; + } + } + + writer.write_struct_end() + } +} + #[cfg(test)] mod tests { use crate::file::metadata::thrift_gen::BoundingBox; diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index a09a703adef8..6396e454fb09 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -15,22 +15,26 @@ // specific language governing permissions and limitations // under the License. +use crate::file::metadata::thrift_gen::{EncryptionAlgorithm, FileMeta}; +use crate::file::metadata::{ + ColumnChunkMetaData, ParquetColumnIndex, ParquetOffsetIndex, RowGroupMetaData, +}; +use crate::schema::types::{SchemaDescPtr, SchemaDescriptor}; +use crate::{ + basic::ColumnOrder, + file::metadata::{FileMetaData, ParquetMetaDataBuilder}, +}; #[cfg(feature = "encryption")] -use crate::encryption::{ - encrypt::{ - encrypt_object, encrypt_object_to_vec, write_signed_plaintext_object, FileEncryptor, +use crate::{ + encryption::{ + encrypt::{encrypt_thrift_object, write_signed_plaintext_thrift_object, FileEncryptor}, + modules::{create_footer_aad, create_module_aad, ModuleType}, }, - modules::{create_footer_aad, create_module_aad, ModuleType}, + file::column_crypto_metadata::ColumnCryptoMetaData, + file::metadata::thrift_gen::{AesGcmV1, FileCryptoMetaData}, }; -#[cfg(feature = "encryption")] -use crate::errors::ParquetError; -use crate::format::EncryptionAlgorithm; -#[cfg(feature = "encryption")] -use crate::format::{AesGcmV1, ColumnCryptoMetaData}; -use crate::schema::types; -use crate::schema::types::{SchemaDescPtr, SchemaDescriptor, TypePtr}; -use crate::thrift::TSerializable; use crate::{errors::Result, file::page_index::column_index::ColumnIndexMetaData}; + use crate::{ file::writer::{get_file_magic, TrackedWrite}, parquet_thrift::WriteThrift, @@ -44,18 +48,16 @@ use crate::{ }; use std::io::Write; use std::sync::Arc; -use thrift::protocol::TCompactOutputProtocol; /// Writes `crate::file::metadata` structures to a thrift encoded byte stream /// /// See [`ParquetMetaDataWriter`] for background and example. pub(crate) struct ThriftMetadataWriter<'a, W: Write> { buf: &'a mut TrackedWrite, - schema: &'a TypePtr, schema_descr: &'a SchemaDescPtr, - row_groups: Vec, - column_indexes: Option<&'a [Vec>]>, - offset_indexes: Option<&'a [Vec>]>, + row_groups: Vec, + column_indexes: Option>>>, + offset_indexes: Option>>>, key_value_metadata: Option>, created_by: Option, object_writer: MetadataObjectWriter, @@ -130,14 +132,17 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { } /// Assembles and writes the final metadata to self.buf - pub fn finish(mut self) -> Result { + pub fn finish(mut self) -> Result { let num_rows = self.row_groups.iter().map(|x| x.num_rows).sum(); + let column_indexes = std::mem::take(&mut self.column_indexes); + let offset_indexes = std::mem::take(&mut self.offset_indexes); + // Write column indexes and offset indexes - if let Some(column_indexes) = self.column_indexes { + if let Some(column_indexes) = column_indexes.as_ref() { self.write_column_indexes(column_indexes)?; } - if let Some(offset_indexes) = self.offset_indexes { + if let Some(offset_indexes) = offset_indexes.as_ref() { self.write_offset_indexes(offset_indexes)?; } @@ -146,32 +151,44 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { // for all leaf nodes. // Even if the column has an undefined sort order, such as INTERVAL, this // is still technically the defined TYPEORDER so it should still be set. - let column_orders = (0..self.schema_descr.num_columns()) - .map(|_| crate::format::ColumnOrder::TYPEORDER(crate::format::TypeDefinedOrder {})) + let column_orders = self + .schema_descr + .columns() + .iter() + .map(|col| { + let sort_order = ColumnOrder::get_sort_order( + col.logical_type(), + col.converted_type(), + col.physical_type(), + ); + ColumnOrder::TYPE_DEFINED_ORDER(sort_order) + }) .collect(); + // This field is optional, perhaps in cases where no min/max fields are set // in any Statistics or ColumnIndex object in the whole file. // But for simplicity we always set this field. let column_orders = Some(column_orders); + let (row_groups, unencrypted_row_groups) = self .object_writer .apply_row_group_encryption(self.row_groups)?; let (encryption_algorithm, footer_signing_key_metadata) = self.object_writer.get_plaintext_footer_crypto_metadata(); - let key_value_metadata = self.key_value_metadata.map(|vkv| { - vkv.into_iter() - .map(|kv| crate::format::KeyValue::new(kv.key, kv.value)) - .collect::>() - }); - let mut file_metadata = crate::format::FileMetaData { + + let file_metadata = FileMetaData::new( + self.writer_version, num_rows, - row_groups, - key_value_metadata, - version: self.writer_version, - schema: types::to_thrift(self.schema.as_ref())?, - created_by: self.created_by.clone(), + self.created_by, + self.key_value_metadata, + self.schema_descr.clone(), column_orders, + ); + + let file_meta = FileMeta { + file_metadata: &file_metadata, + row_groups: &row_groups, encryption_algorithm, footer_signing_key_metadata, }; @@ -179,7 +196,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { // Write file metadata let start_pos = self.buf.bytes_written(); self.object_writer - .write_file_metadata(&file_metadata, &mut self.buf)?; + .write_file_metadata(&file_meta, &mut self.buf)?; let end_pos = self.buf.bytes_written(); // Write footer @@ -188,28 +205,49 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { self.buf.write_all(&metadata_len.to_le_bytes())?; self.buf.write_all(self.object_writer.get_file_magic())?; - if let Some(row_groups) = unencrypted_row_groups { - // If row group metadata was encrypted, we replace the encrypted row groups with - // unencrypted metadata before it is returned to users. This allows the metadata - // to be usable for retrieving the row group statistics for example, without users - // needing to decrypt the metadata. - file_metadata.row_groups = row_groups; - } + // If row group metadata was encrypted, we replace the encrypted row groups with + // unencrypted metadata before it is returned to users. This allows the metadata + // to be usable for retrieving the row group statistics for example, without users + // needing to decrypt the metadata. + let mut builder = ParquetMetaDataBuilder::new(file_metadata); + + builder = match unencrypted_row_groups { + Some(rg) => builder.set_row_groups(rg), + None => builder.set_row_groups(row_groups), + }; + + let column_indexes: Option = column_indexes.map(|ovvi| { + ovvi.into_iter() + .map(|vi| { + vi.into_iter() + .map(|oi| oi.unwrap_or(ColumnIndexMetaData::NONE)) + .collect() + }) + .collect() + }); + + // FIXME(ets): this will panic if there's a missing index. + let offset_indexes: Option = offset_indexes.map(|ovvi| { + ovvi.into_iter() + .map(|vi| vi.into_iter().map(|oi| oi.unwrap()).collect()) + .collect() + }); - Ok(file_metadata) + builder = builder.set_column_index(column_indexes); + builder = builder.set_offset_index(offset_indexes); + + Ok(builder.build()) } pub fn new( buf: &'a mut TrackedWrite, - schema: &'a TypePtr, schema_descr: &'a SchemaDescPtr, - row_groups: Vec, + row_groups: Vec, created_by: Option, writer_version: i32, ) -> Self { Self { buf, - schema, schema_descr, row_groups, column_indexes: None, @@ -223,7 +261,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { pub fn with_column_indexes( mut self, - column_indexes: &'a [Vec>], + column_indexes: Vec>>, ) -> Self { self.column_indexes = Some(column_indexes); self @@ -231,7 +269,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { pub fn with_offset_indexes( mut self, - offset_indexes: &'a [Vec>], + offset_indexes: Vec>>, ) -> Self { self.offset_indexes = Some(offset_indexes); self @@ -361,12 +399,7 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { let schema_descr = Arc::new(SchemaDescriptor::new(schema.clone())); let created_by = file_metadata.created_by().map(str::to_string); - let row_groups = self - .metadata - .row_groups() - .iter() - .map(|rg| rg.to_thrift()) - .collect::>(); + let row_groups = self.metadata.row_groups.clone(); let key_value_metadata = file_metadata.key_value_metadata().cloned(); @@ -375,14 +408,20 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { let mut encoder = ThriftMetadataWriter::new( &mut self.buf, - &schema, &schema_descr, row_groups, created_by, file_metadata.version(), ); - encoder = encoder.with_column_indexes(&column_indexes); - encoder = encoder.with_offset_indexes(&offset_indexes); + + if let Some(column_indexes) = column_indexes { + encoder = encoder.with_column_indexes(column_indexes); + } + + if let Some(offset_indexes) = offset_indexes { + encoder = encoder.with_offset_indexes(offset_indexes); + } + if let Some(key_value_metadata) = key_value_metadata { encoder = encoder.with_key_value_metadata(key_value_metadata); } @@ -391,46 +430,38 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { Ok(()) } - fn convert_column_indexes(&self) -> Vec>> { - if let Some(row_group_column_indexes) = self.metadata.column_index() { - (0..self.metadata.row_groups().len()) - .map(|rg_idx| { - let column_indexes = &row_group_column_indexes[rg_idx]; - column_indexes - .iter() - .map(|column_index| Some(column_index.clone())) - .collect() - }) - .collect() - } else { - // make a None for each row group, for each column - self.metadata - .row_groups() - .iter() - .map(|rg| std::iter::repeat_n(None, rg.columns().len()).collect()) - .collect() - } + fn convert_column_indexes(&self) -> Option>>> { + // FIXME(ets): we're converting from ParquetColumnIndex to vec>, + // but then converting back to ParquetColumnIndex in the end. need to unify this. + self.metadata + .column_index() + .map(|row_group_column_indexes| { + (0..self.metadata.row_groups().len()) + .map(|rg_idx| { + let column_indexes = &row_group_column_indexes[rg_idx]; + column_indexes + .iter() + .map(|column_index| Some(column_index.clone())) + .collect() + }) + .collect() + }) } - fn convert_offset_index(&self) -> Vec>> { - if let Some(row_group_offset_indexes) = self.metadata.offset_index() { - (0..self.metadata.row_groups().len()) - .map(|rg_idx| { - let offset_indexes = &row_group_offset_indexes[rg_idx]; - offset_indexes - .iter() - .map(|offset_index| Some(offset_index.clone())) - .collect() - }) - .collect() - } else { - // make a None for each row group, for each column - self.metadata - .row_groups() - .iter() - .map(|rg| std::iter::repeat_n(None, rg.columns().len()).collect()) - .collect() - } + fn convert_offset_index(&self) -> Option>>> { + self.metadata + .offset_index() + .map(|row_group_offset_indexes| { + (0..self.metadata.row_groups().len()) + .map(|rg_idx| { + let offset_indexes = &row_group_offset_indexes[rg_idx]; + offset_indexes + .iter() + .map(|offset_index| Some(offset_index.clone())) + .collect() + }) + .collect() + }) } } @@ -441,13 +472,6 @@ struct MetadataObjectWriter { } impl MetadataObjectWriter { - #[inline] - fn write_object(object: &impl TSerializable, sink: impl Write) -> Result<()> { - let mut protocol = TCompactOutputProtocol::new(sink); - object.write_to_out_protocol(&mut protocol)?; - Ok(()) - } - #[inline] fn write_thrift_object(object: &impl WriteThrift, sink: impl Write) -> Result<()> { let mut protocol = ThriftCompactOutputProtocol::new(sink); @@ -460,19 +484,15 @@ impl MetadataObjectWriter { #[cfg(not(feature = "encryption"))] impl MetadataObjectWriter { /// Write [`FileMetaData`] in Thrift format - fn write_file_metadata( - &self, - file_metadata: &crate::format::FileMetaData, - sink: impl Write, - ) -> Result<()> { - Self::write_object(file_metadata, sink) + fn write_file_metadata(&self, file_metadata: &FileMeta, sink: impl Write) -> Result<()> { + Self::write_thrift_object(file_metadata, sink) } /// Write a column [`OffsetIndex`] in Thrift format fn write_offset_index( &self, offset_index: &OffsetIndexMetaData, - _column_chunk: &crate::format::ColumnChunk, + _column_chunk: &ColumnChunkMetaData, _row_group_idx: usize, _column_idx: usize, sink: impl Write, @@ -484,7 +504,7 @@ impl MetadataObjectWriter { fn write_column_index( &self, column_index: &ColumnIndexMetaData, - _column_chunk: &crate::format::ColumnChunk, + _column_chunk: &ColumnChunkMetaData, _row_group_idx: usize, _column_idx: usize, sink: impl Write, @@ -495,11 +515,8 @@ impl MetadataObjectWriter { /// No-op implementation of row-group metadata encryption fn apply_row_group_encryption( &self, - row_groups: Vec, - ) -> Result<( - Vec, - Option>, - )> { + row_groups: Vec, + ) -> Result<(Vec, Option>)> { Ok((row_groups, None)) } @@ -527,29 +544,25 @@ impl MetadataObjectWriter { /// Write [`FileMetaData`] in Thrift format, possibly encrypting it if required /// /// [`FileMetaData`]: crate::format::FileMetaData - fn write_file_metadata( - &self, - file_metadata: &crate::format::FileMetaData, - mut sink: impl Write, - ) -> Result<()> { + fn write_file_metadata(&self, file_metadata: &FileMeta, mut sink: impl Write) -> Result<()> { match self.file_encryptor.as_ref() { Some(file_encryptor) if file_encryptor.properties().encrypt_footer() => { // First write FileCryptoMetadata let crypto_metadata = Self::file_crypto_metadata(file_encryptor)?; - let mut protocol = TCompactOutputProtocol::new(&mut sink); - crypto_metadata.write_to_out_protocol(&mut protocol)?; + let mut protocol = ThriftCompactOutputProtocol::new(&mut sink); + crypto_metadata.write_thrift(&mut protocol)?; // Then write encrypted footer let aad = create_footer_aad(file_encryptor.file_aad())?; let mut encryptor = file_encryptor.get_footer_encryptor()?; - encrypt_object(file_metadata, &mut encryptor, &mut sink, &aad) + encrypt_thrift_object(file_metadata, &mut encryptor, &mut sink, &aad) } Some(file_encryptor) if file_metadata.encryption_algorithm.is_some() => { let aad = create_footer_aad(file_encryptor.file_aad())?; let mut encryptor = file_encryptor.get_footer_encryptor()?; - write_signed_plaintext_object(file_metadata, &mut encryptor, &mut sink, &aad) + write_signed_plaintext_thrift_object(file_metadata, &mut encryptor, &mut sink, &aad) } - _ => Self::write_object(file_metadata, &mut sink), + _ => Self::write_thrift_object(file_metadata, &mut sink), } } @@ -559,7 +572,7 @@ impl MetadataObjectWriter { fn write_offset_index( &self, offset_index: &OffsetIndexMetaData, - column_chunk: &crate::format::ColumnChunk, + column_chunk: &ColumnChunkMetaData, row_group_idx: usize, column_idx: usize, sink: impl Write, @@ -584,7 +597,7 @@ impl MetadataObjectWriter { fn write_column_index( &self, column_index: &ColumnIndexMetaData, - column_chunk: &crate::format::ColumnChunk, + column_chunk: &ColumnChunkMetaData, row_group_idx: usize, column_idx: usize, sink: impl Write, @@ -608,11 +621,8 @@ impl MetadataObjectWriter { /// and possibly unencrypted metadata to be returned to clients if data was encrypted. fn apply_row_group_encryption( &self, - row_groups: Vec, - ) -> Result<( - Vec, - Option>, - )> { + row_groups: Vec, + ) -> Result<(Vec, Option>)> { match &self.file_encryptor { Some(file_encryptor) => { let unencrypted_row_groups = row_groups.clone(); @@ -636,21 +646,12 @@ impl MetadataObjectWriter { object: &impl WriteThrift, mut sink: impl Write, file_encryptor: &FileEncryptor, - column_metadata: &crate::format::ColumnChunk, + column_metadata: &ColumnChunkMetaData, module_type: ModuleType, row_group_index: usize, column_index: usize, ) -> Result<()> { - let column_path_vec = &column_metadata - .meta_data - .as_ref() - .ok_or_else(|| { - general_err!( - "Column metadata not set for column {} when encrypting object", - column_index - ) - })? - .path_in_schema; + let column_path_vec = column_metadata.column_path().as_ref(); let joined_column_path; let column_path = if column_path_vec.len() == 1 { @@ -699,36 +700,34 @@ impl MetadataObjectWriter { .aad_prefix() .map(|_| !file_encryptor.properties().store_aad_prefix()); let aad_prefix = if file_encryptor.properties().store_aad_prefix() { - file_encryptor.properties().aad_prefix().cloned() + file_encryptor.properties().aad_prefix() } else { None }; - EncryptionAlgorithm::AESGCMV1(AesGcmV1 { - aad_prefix, + EncryptionAlgorithm::AES_GCM_V1(AesGcmV1 { + aad_prefix: aad_prefix.cloned(), aad_file_unique: Some(file_encryptor.aad_file_unique().clone()), supply_aad_prefix, }) } - fn file_crypto_metadata( - file_encryptor: &FileEncryptor, - ) -> Result { + fn file_crypto_metadata(file_encryptor: &FileEncryptor) -> Result { let properties = file_encryptor.properties(); - Ok(crate::format::FileCryptoMetaData { + Ok(FileCryptoMetaData { encryption_algorithm: Self::encryption_algorithm_from_encryptor(file_encryptor), key_metadata: properties.footer_key_metadata().cloned(), }) } fn encrypt_row_groups( - row_groups: Vec, + row_groups: Vec, file_encryptor: &Arc, - ) -> Result> { + ) -> Result> { row_groups .into_iter() .enumerate() .map(|(rg_idx, mut rg)| { - let cols: Result> = rg + let cols: Result> = rg .columns .into_iter() .enumerate() @@ -744,26 +743,24 @@ impl MetadataObjectWriter { /// Apply column encryption to column chunk metadata fn encrypt_column_chunk( - mut column_chunk: crate::format::ColumnChunk, + mut column_chunk: ColumnChunkMetaData, file_encryptor: &Arc, row_group_index: usize, column_index: usize, - ) -> Result { + ) -> Result { // Column crypto metadata should have already been set when the column was created. // Here we apply the encryption by encrypting the column metadata if required. - match column_chunk.crypto_metadata.as_ref() { + match column_chunk.column_crypto_metadata.as_ref() { None => {} - Some(ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_)) => { + Some(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY) => { // When uniform encryption is used the footer is already encrypted, // so the column chunk does not need additional encryption. } - Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(col_key)) => { + Some(ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(col_key)) => { + use crate::file::metadata::thrift_gen::serialize_column_meta_data; + let column_path = col_key.path_in_schema.join("."); let mut column_encryptor = file_encryptor.get_column_encryptor(&column_path)?; - let meta_data = column_chunk - .meta_data - .take() - .ok_or_else(|| general_err!("Column metadata not set for encryption"))?; let aad = create_module_aad( file_encryptor.file_aad(), ModuleType::ColumnMetaData, @@ -771,10 +768,15 @@ impl MetadataObjectWriter { column_index, None, )?; - let ciphertext = encrypt_object_to_vec(&meta_data, &mut column_encryptor, &aad)?; + // create temp ColumnMetaData that we can encrypt + let mut buffer: Vec = vec![]; + { + let mut prot = ThriftCompactOutputProtocol::new(&mut buffer); + serialize_column_meta_data(&column_chunk, &mut prot)?; + } + let ciphertext = column_encryptor.encrypt(&buffer, &aad)?; column_chunk.encrypted_column_metadata = Some(ciphertext); - debug_assert!(column_chunk.meta_data.is_none()); } } diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index 1442f0f67ca0..b0d64ea76017 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -2546,8 +2546,8 @@ mod tests { } let file_metadata = file_writer.close().unwrap(); - assert_eq!(file_metadata.num_rows, 25); - assert_eq!(file_metadata.row_groups.len(), 5); + assert_eq!(file_metadata.file_metadata().num_rows(), 25); + assert_eq!(file_metadata.num_row_groups(), 5); // read only the 3rd row group let read_options = ReadOptionsBuilder::new() diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index d6f742c1377c..1ce7ad29123c 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -155,7 +155,6 @@ pub type OnCloseRowGroup<'a, W> = Box< /// - After all row groups have been written, close the file writer using `close` method. pub struct SerializedFileWriter { buf: TrackedWrite, - schema: TypePtr, descr: SchemaDescPtr, props: WriterPropertiesPtr, row_groups: Vec, @@ -195,7 +194,6 @@ impl SerializedFileWriter { Self::start_file(&properties, &mut buf)?; Ok(Self { buf, - schema, descr: Arc::new(schema_descriptor), props: properties, row_groups: vec![], @@ -298,7 +296,7 @@ impl SerializedFileWriter { /// Unlike [`Self::close`] this does not consume self /// /// Attempting to write after calling finish will result in an error - pub fn finish(&mut self) -> Result { + pub fn finish(&mut self) -> Result { self.assert_previous_writer_closed()?; let metadata = self.write_metadata()?; self.buf.flush()?; @@ -306,7 +304,7 @@ impl SerializedFileWriter { } /// Closes and finalises file writer, returning the file metadata. - pub fn close(mut self) -> Result { + pub fn close(mut self) -> Result { self.finish() } @@ -326,8 +324,9 @@ impl SerializedFileWriter { Ok(()) } - /// Assembles and writes metadata at the end of the file. - fn write_metadata(&mut self) -> Result { + /// Assembles and writes metadata at the end of the file. This will take ownership + /// of `row_groups` and the page index structures. + fn write_metadata(&mut self) -> Result { self.finished = true; // write out any remaining bloom filters after all row groups @@ -341,15 +340,13 @@ impl SerializedFileWriter { None => Some(self.kv_metadatas.clone()), }; - let row_groups = self - .row_groups - .iter() - .map(|v| v.to_thrift()) - .collect::>(); + // take ownership of metadata + let row_groups = std::mem::take(&mut self.row_groups); + let column_indexes = std::mem::take(&mut self.column_indexes); + let offset_indexes = std::mem::take(&mut self.offset_indexes); let mut encoder = ThriftMetadataWriter::new( &mut self.buf, - &self.schema, &self.descr, row_groups, Some(self.props.created_by().to_string()), @@ -365,8 +362,10 @@ impl SerializedFileWriter { encoder = encoder.with_key_value_metadata(key_value_metadata) } - encoder = encoder.with_column_indexes(&self.column_indexes); - encoder = encoder.with_offset_indexes(&self.offset_indexes); + encoder = encoder.with_column_indexes(column_indexes); + if !self.props.offset_index_disabled() { + encoder = encoder.with_offset_indexes(offset_indexes); + } encoder.finish() } @@ -1629,7 +1628,7 @@ mod tests { file: W, data: Vec>, compression: Compression, - ) -> crate::format::FileMetaData + ) -> ParquetMetaData where W: Write + Send, R: ChunkReader + From + 'static, @@ -1644,7 +1643,7 @@ mod tests { data: Vec>, value: F, compression: Compression, - ) -> crate::format::FileMetaData + ) -> ParquetMetaData where W: Write + Send, R: ChunkReader + From + 'static, @@ -1715,7 +1714,7 @@ mod tests { /// File write-read roundtrip. /// `data` consists of arrays of values for each row group. - fn test_file_roundtrip(file: File, data: Vec>) -> crate::format::FileMetaData { + fn test_file_roundtrip(file: File, data: Vec>) -> ParquetMetaData { test_roundtrip_i32::(file, data, Compression::UNCOMPRESSED) } @@ -1790,13 +1789,12 @@ mod tests { fn test_column_offset_index_file() { let file = tempfile::tempfile().unwrap(); let file_metadata = test_file_roundtrip(file, vec![vec![1, 2, 3, 4, 5]]); - file_metadata.row_groups.iter().for_each(|row_group| { - row_group.columns.iter().for_each(|column_chunk| { - assert_ne!(None, column_chunk.column_index_offset); - assert_ne!(None, column_chunk.column_index_length); - - assert_ne!(None, column_chunk.offset_index_offset); - assert_ne!(None, column_chunk.offset_index_length); + file_metadata.row_groups().iter().for_each(|row_group| { + row_group.columns().iter().for_each(|column_chunk| { + assert!(column_chunk.column_index_offset().is_some()); + assert!(column_chunk.column_index_length().is_some()); + assert!(column_chunk.offset_index_offset().is_some()); + assert!(column_chunk.offset_index_length().is_some()); }) }); } @@ -2037,15 +2035,15 @@ mod tests { row_group_writer.close().unwrap(); let metadata = file_writer.finish().unwrap(); - assert_eq!(metadata.row_groups.len(), 1); - let row_group = &metadata.row_groups[0]; - assert_eq!(row_group.columns.len(), 2); + assert_eq!(metadata.num_row_groups(), 1); + let row_group = metadata.row_group(0); + assert_eq!(row_group.num_columns(), 2); // Column "a" has both offset and column index, as requested - assert!(row_group.columns[0].offset_index_offset.is_some()); - assert!(row_group.columns[0].column_index_offset.is_some()); + assert!(row_group.column(0).offset_index_offset().is_some()); + assert!(row_group.column(0).column_index_offset().is_some()); // Column "b" should only have offset index - assert!(row_group.columns[1].offset_index_offset.is_some()); - assert!(row_group.columns[1].column_index_offset.is_none()); + assert!(row_group.column(1).offset_index_offset().is_some()); + assert!(row_group.column(1).column_index_offset().is_none()); let err = file_writer.next_row_group().err().unwrap().to_string(); assert_eq!(err, "Parquet error: SerializedFileWriter already finished"); @@ -2099,9 +2097,8 @@ mod tests { row_group_writer.close().unwrap(); let file_metadata = writer.close().unwrap(); - assert_eq!(file_metadata.row_groups.len(), 1); - assert_eq!(file_metadata.row_groups[0].columns.len(), 1); - assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some()); + assert_eq!(file_metadata.num_row_groups(), 1); + assert_eq!(file_metadata.row_group(0).num_columns(), 1); let check_def_hist = |def_hist: &[i64]| { assert_eq!(def_hist.len(), 2); @@ -2109,29 +2106,26 @@ mod tests { assert_eq!(def_hist[1], 7); }; - assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some()); - let meta_data = file_metadata.row_groups[0].columns[0] - .meta_data - .as_ref() - .unwrap(); - assert!(meta_data.size_statistics.is_some()); - let size_stats = meta_data.size_statistics.as_ref().unwrap(); + let meta_data = file_metadata.row_group(0).column(0); - assert!(size_stats.repetition_level_histogram.is_none()); - assert!(size_stats.definition_level_histogram.is_some()); - assert!(size_stats.unencoded_byte_array_data_bytes.is_some()); + assert!(meta_data.repetition_level_histogram().is_none()); + assert!(meta_data.definition_level_histogram().is_some()); + assert!(meta_data.unencoded_byte_array_data_bytes().is_some()); assert_eq!( unenc_size, - size_stats.unencoded_byte_array_data_bytes.unwrap() + meta_data.unencoded_byte_array_data_bytes().unwrap() ); - check_def_hist(size_stats.definition_level_histogram.as_ref().unwrap()); + check_def_hist(meta_data.definition_level_histogram().unwrap().values()); // check that the read metadata is also correct let options = ReadOptionsBuilder::new().with_page_index().build(); let reader = SerializedFileReader::new_with_options(file, options).unwrap(); let rfile_metadata = reader.metadata().file_metadata(); - assert_eq!(rfile_metadata.num_rows(), file_metadata.num_rows); + assert_eq!( + rfile_metadata.num_rows(), + file_metadata.file_metadata().num_rows() + ); assert_eq!(reader.num_row_groups(), 1); let rowgroup = reader.get_row_group(0).unwrap(); assert_eq!(rowgroup.num_columns(), 1); @@ -2251,9 +2245,8 @@ mod tests { row_group_writer.close().unwrap(); let file_metadata = writer.close().unwrap(); - assert_eq!(file_metadata.row_groups.len(), 1); - assert_eq!(file_metadata.row_groups[0].columns.len(), 1); - assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some()); + assert_eq!(file_metadata.num_row_groups(), 1); + assert_eq!(file_metadata.row_group(0).num_columns(), 1); let check_def_hist = |def_hist: &[i64]| { assert_eq!(def_hist.len(), 4); @@ -2271,25 +2264,22 @@ mod tests { // check that histograms are set properly in the write and read metadata // also check that unencoded_byte_array_data_bytes is not set - assert!(file_metadata.row_groups[0].columns[0].meta_data.is_some()); - let meta_data = file_metadata.row_groups[0].columns[0] - .meta_data - .as_ref() - .unwrap(); - assert!(meta_data.size_statistics.is_some()); - let size_stats = meta_data.size_statistics.as_ref().unwrap(); - assert!(size_stats.repetition_level_histogram.is_some()); - assert!(size_stats.definition_level_histogram.is_some()); - assert!(size_stats.unencoded_byte_array_data_bytes.is_none()); - check_def_hist(size_stats.definition_level_histogram.as_ref().unwrap()); - check_rep_hist(size_stats.repetition_level_histogram.as_ref().unwrap()); + let meta_data = file_metadata.row_group(0).column(0); + assert!(meta_data.repetition_level_histogram().is_some()); + assert!(meta_data.definition_level_histogram().is_some()); + assert!(meta_data.unencoded_byte_array_data_bytes().is_none()); + check_def_hist(meta_data.definition_level_histogram().unwrap().values()); + check_rep_hist(meta_data.repetition_level_histogram().unwrap().values()); // check that the read metadata is also correct let options = ReadOptionsBuilder::new().with_page_index().build(); let reader = SerializedFileReader::new_with_options(file, options).unwrap(); let rfile_metadata = reader.metadata().file_metadata(); - assert_eq!(rfile_metadata.num_rows(), file_metadata.num_rows); + assert_eq!( + rfile_metadata.num_rows(), + file_metadata.file_metadata().num_rows() + ); assert_eq!(reader.num_row_groups(), 1); let rowgroup = reader.get_row_group(0).unwrap(); assert_eq!(rowgroup.num_columns(), 1); diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 1406295c3a4f..dca0f8417957 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -1114,6 +1114,24 @@ impl SchemaDescriptor { } } +// walk tree and count nodes +pub(crate) fn num_nodes(tp: &TypePtr) -> usize { + let mut n_nodes = 1usize; // count root + for f in tp.get_fields().iter() { + count_nodes(f, &mut n_nodes); + } + n_nodes +} + +pub(crate) fn count_nodes(tp: &TypePtr, n_nodes: &mut usize) { + *n_nodes += 1; + if let Type::GroupType { ref fields, .. } = tp.as_ref() { + for f in fields { + count_nodes(f, n_nodes); + } + } +} + // do a quick walk of the tree to get proper sizing for SchemaDescriptor arrays fn num_leaves(tp: &TypePtr) -> usize { let mut n_leaves = 0usize; diff --git a/parquet/tests/encryption/encryption.rs b/parquet/tests/encryption/encryption.rs index 96dd8654cd76..0261c22c2c2d 100644 --- a/parquet/tests/encryption/encryption.rs +++ b/parquet/tests/encryption/encryption.rs @@ -982,23 +982,17 @@ pub fn test_retrieve_row_group_statistics_after_encrypted_write() { } let file_metadata = writer.close().unwrap(); - assert_eq!(file_metadata.row_groups.len(), 1); - let row_group = &file_metadata.row_groups[0]; - assert_eq!(row_group.columns.len(), 1); - let column = &row_group.columns[0]; - let column_stats = column - .meta_data - .as_ref() - .unwrap() - .statistics - .as_ref() - .unwrap(); + assert_eq!(file_metadata.num_row_groups(), 1); + let row_group = file_metadata.row_group(0); + assert_eq!(row_group.num_columns(), 1); + let column = row_group.column(0); + let column_stats = column.statistics().unwrap(); assert_eq!( - column_stats.min_value.as_deref(), + column_stats.min_bytes_opt(), Some(3i32.to_le_bytes().as_slice()) ); assert_eq!( - column_stats.max_value.as_deref(), + column_stats.max_bytes_opt(), Some(19i32.to_le_bytes().as_slice()) ); } diff --git a/parquet/tests/encryption/encryption_async.rs b/parquet/tests/encryption/encryption_async.rs index 9c1e0c00a3f6..6999b1a931f4 100644 --- a/parquet/tests/encryption/encryption_async.rs +++ b/parquet/tests/encryption/encryption_async.rs @@ -34,9 +34,9 @@ use parquet::arrow::{ArrowWriter, AsyncArrowWriter}; use parquet::encryption::decrypt::FileDecryptionProperties; use parquet::encryption::encrypt::FileEncryptionProperties; use parquet::errors::ParquetError; +use parquet::file::metadata::ParquetMetaData; use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder}; use parquet::file::writer::SerializedFileWriter; -use parquet::format::FileMetaData; use std::io::Write; use std::sync::Arc; use tokio::fs::File; @@ -647,7 +647,7 @@ fn spawn_column_parallel_row_group_writer( async fn concatenate_parallel_row_groups( mut parquet_writer: SerializedFileWriter, mut serialize_rx: Receiver>, -) -> Result { +) -> Result { while let Some(task) = serialize_rx.recv().await { let result = task.await; let mut rg_out = parquet_writer.next_row_group()?; @@ -818,8 +818,7 @@ async fn test_multi_threaded_encrypted_writing() { let metadata = serialized_file_writer.close().unwrap(); // Close the file writer which writes the footer - assert_eq!(metadata.num_rows, 50); - assert_eq!(metadata.schema, metadata.schema); + assert_eq!(metadata.file_metadata().num_rows(), 50); // Check that the file was written correctly let (read_record_batches, read_metadata) = @@ -909,8 +908,7 @@ async fn test_multi_threaded_encrypted_writing_deprecated() { // Close the file writer which writes the footer let metadata = writer.finish().unwrap(); - assert_eq!(metadata.num_rows, 100); - assert_eq!(metadata.schema, metadata.schema); + assert_eq!(metadata.file_metadata().num_rows(), 100); // Check that the file was written correctly let (read_record_batches, read_metadata) = From 10ea3b2f0121747dd7bc362b833b55310d9314a2 Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Wed, 1 Oct 2025 09:48:48 -0700 Subject: [PATCH 14/15] [thrift-remodel] Remove most usage of `parquet::format` structures (#8476) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change This should complete the major work of the remodel. Follow-on tasks include performance tweaks, documentation, and adding the ability to skip unneeded structures when decoding. None of the latter should involve breaking changes. # What changes are included in this PR? The major change is removing all of the code to convert to `parquet::format` structures. The only places those are still used are in the benchmark and `bin` code which are not strictly in the parquet crate. Once those are cleaned up we can deprecate the `format` module. This also adds a markdown file documenting the use of the new Thrift macros. # Are these changes tested? Should be covered by existing tests. # Are there any user-facing changes? Yes --------- Co-authored-by: Matthijs Brobbel --- parquet/THRIFT.md | 447 +++++++++++ parquet/src/arrow/schema/extension.rs | 4 +- parquet/src/basic.rs | 800 +------------------- parquet/src/bin/parquet-layout.rs | 32 +- parquet/src/bloom_filter/mod.rs | 62 +- parquet/src/column/page.rs | 4 +- parquet/src/column/page_encryption.rs | 4 +- parquet/src/column/writer/mod.rs | 3 +- parquet/src/file/column_crypto_metadata.rs | 55 -- parquet/src/file/metadata/memory.rs | 3 +- parquet/src/file/metadata/mod.rs | 378 ++------- parquet/src/file/metadata/parser.rs | 199 +++-- parquet/src/file/metadata/thrift_gen.rs | 178 ++++- parquet/src/file/metadata/writer.rs | 10 +- parquet/src/file/mod.rs | 1 - parquet/src/file/page_encoding_stats.rs | 82 -- parquet/src/file/page_index/column_index.rs | 9 +- parquet/src/file/page_index/index_reader.rs | 4 +- parquet/src/file/page_index/offset_index.rs | 35 +- parquet/src/file/serialized_reader.rs | 6 +- parquet/src/file/statistics.rs | 238 +----- parquet/src/file/writer.rs | 35 +- parquet/src/geospatial/bounding_box.rs | 17 - parquet/src/geospatial/statistics.rs | 36 +- parquet/src/parquet_macros.rs | 194 ++--- parquet/src/parquet_thrift.rs | 38 +- parquet/src/schema/types.rs | 298 +------- parquet/src/thrift.rs | 348 +-------- parquet/tests/arrow_reader/io/mod.rs | 10 +- 29 files changed, 1027 insertions(+), 2503 deletions(-) create mode 100644 parquet/THRIFT.md delete mode 100644 parquet/src/file/page_encoding_stats.rs diff --git a/parquet/THRIFT.md b/parquet/THRIFT.md new file mode 100644 index 000000000000..06e97709cce3 --- /dev/null +++ b/parquet/THRIFT.md @@ -0,0 +1,447 @@ + + +# Thrift serialization in the parquet crate + +For both performance and flexibility reasons, this crate uses custom Thrift parsers and +serialization mechanisms. For many of the objects defined by the Parquet specification macros +are used to generate the objects as well as the code to serialize them. But in certain instances +(performance bottlenecks, additions to the spec, etc.), it becomes necessary to implement the +serialization code manually. This document serves to document both the standard usage of the +Thrift macros, as well as how to implement custom encoders and decoders. + +## Thrift macros + +The Parquet specification utilizes Thrift enums, unions, and structs, defined by an Interface +Description Language (IDL). This IDL is usually parsed by a Thrift code generator to produce +language specific structures and serialization/deserialization code. This crate, however, uses +Rust macros to perform the same function. In addition to skipping creation of additional duplicate +structures, doing so allows for customizations that produce more performant code, as well as the +ability to pick and choose which fields to process. + +### Enums + +Thrift enums are the simplest structure, and are logically identical to Rust enums with unit +variants. The IDL description will look like + +``` +enum Type { + BOOLEAN = 0; + INT32 = 1; + INT64 = 2; + INT96 = 3; + FLOAT = 4; + DOUBLE = 5; + BYTE_ARRAY = 6; + FIXED_LEN_BYTE_ARRAY = 7; +} +``` + +The `thrift_enum` macro can be used in this instance. + +```rust +thrift_enum!( + enum Type { + BOOLEAN = 0; + INT32 = 1; + INT64 = 2; + INT96 = 3; + FLOAT = 4; + DOUBLE = 5; + BYTE_ARRAY = 6; + FIXED_LEN_BYTE_ARRAY = 7; +} +); +``` + +which will produce a public Rust enum + +```rust +pub enum Type { + BOOLEAN, + INT32, + INT64, + INT96, + FLOAT, + DOUBLE, + BYTE_ARRAY, + FIXED_LEN_BYTE_ARRAY, +} +``` + +### Unions + +Thrift unions are a special kind of struct in which only a single field is populated. In this +regard they are much like Rust enums which can have a mix of unit and tuple variants. Because of +this flexibility, specifying unions is a little bit trickier. + +Often times a union will be defined for which all the variants are typed with empty structs. For +example the `TimeUnit` union used for `LogicalType`s. + +``` +struct MilliSeconds {} +struct MicroSeconds {} +struct NanoSeconds {} +union TimeUnit { + 1: MilliSeconds MILLIS + 2: MicroSeconds MICROS + 3: NanoSeconds NANOS +} +``` + +When serialized, these empty structs become a single `0` (to mark the end of the struct). As an +optimization, and to allow for a simpler interface, the `thrift_union_all_empty` macro can be used. + +```rust +thrift_union_all_empty!( +union TimeUnit { + 1: MilliSeconds MILLIS + 2: MicroSeconds MICROS + 3: NanoSeconds NANOS +} +); +``` + +This macro will ignore the types specified for each variant, and will produce the following Rust +`enum`: + +```rust +pub enum TimeUnit { + MILLIS, + MICROS, + NANOS, +} +``` + +For unions with mixed variant types, some modifications to the IDL are necessary. Take the +definition of `ColumnCryptoMetadata`. + +``` +struct EncryptionWithFooterKey { +} + +struct EncryptionWithColumnKey { + /** Column path in schema **/ + 1: required list path_in_schema + + /** Retrieval metadata of column encryption key **/ + 2: optional binary key_metadata +} + +union ColumnCryptoMetaData { + 1: EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY + 2: EncryptionWithColumnKey ENCRYPTION_WITH_COLUMN_KEY +} +``` + +The `ENCRYPTION_WITH_FOOTER_KEY` variant is typed with an empty struct, while +`ENCRYPTION_WITH_COLUMN_KEY` has the type of a struct with fields. In this case, the `thrift_union` +macro is used. + +```rust +thrift_union!( +union ColumnCryptoMetaData { + 1: ENCRYPTION_WITH_FOOTER_KEY + 2: (EncryptionWithColumnKey) ENCRYPTION_WITH_COLUMN_KEY +} +); +``` + +Here, the type has been omitted for `ENCRYPTION_WITH_FOOTER_KEY` to indicate it should be a unit +variant, while the type for `ENCRYPTION_WITH_COLUMN_KEY` is enclosed in parens. The parens are +necessary to provide a semantic clue to the macro that the identifier is a type. The above will +produce the Rust enum + +```rust +pub enum ColumnCryptoMetaData { + ENCRYPTION_WITH_FOOTER_KEY, + ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey), +} +``` + +### Structs + +The `thrift_struct` macro is used for structs. This macro is a little more flexible than the others +because it allows for the visibility to be specified, and also allows for lifetimes to be specified +for the defined structs as well as their fields. An example of this is the `SchemaElement` struct. +This is defined in this crate as + +```rust +thrift_struct!( +pub(crate) struct SchemaElement<'a> { + 1: optional Type r#type; + 2: optional i32 type_length; + 3: optional Repetition repetition_type; + 4: required string<'a> name; + 5: optional i32 num_children; + 6: optional ConvertedType converted_type; + 7: optional i32 scale + 8: optional i32 precision + 9: optional i32 field_id; + 10: optional LogicalType logical_type +} +); +``` + +Here the `string` field `name` is given a lifetime annotation, which is then propagated to the +struct definition. Without this annotation, the resultant field would be a `String` type, rather +than a string slice. The visibility of this struct (and all fields) will be `pub(crate)`. The +resultant Rust struct will be + +```rust +pub(crate) struct SchemaElement<'a> { + pub(crate) r#type: Type, // here we've changed the name `type` to `r#type` to avoid reserved words + pub(crate) type_length: i32, + pub(crate) repetition_type: Repetition, + pub(crate) name: &'a str, + ... +} +``` + +The lifetime annotations can also be added to list elements, as in + +```rust +thrift_struct!( +struct FileMetaData<'a> { + /** Version of this file **/ + 1: required i32 version + 2: required list<'a> schema; + 3: required i64 num_rows + 4: required list<'a> row_groups + 5: optional list key_value_metadata + 6: optional string created_by + 7: optional list column_orders; + 8: optional EncryptionAlgorithm encryption_algorithm + 9: optional binary footer_signing_key_metadata +} +); +``` + +Note that the lifetime annotation precedes the element type specification. + +## Serialization traits + +Serialization is performed via several Rust traits. On the deserialization, objects implement +the `ReadThrift` trait. This defines a `read_thrift` function that takes a +`ThriftCompactInputProtocol` I/O object as an argument. The `read_thrift` function performs +all steps necessary to deserialize the object from the input stream, and is usually produced by +one of the macros mentioned above. + +On the serialization side, the `WriteThrift` and `WriteThriftField` traits are used in conjunction +with a `ThriftCompactOutputProtocol` struct. As above, the Thrift macros produce the necessary +implementations needed to perform serialization. + +While the macros can be used in most circumstances, sometimes more control is needed. The following +sections provide information on how to provide custom implementations for the serialization +traits. + +### ReadThrift Customization + +Thrift enums are serialized as a single `i32` value. The process of reading an enum is straightforward: +read the enum discriminant, and then match on the possible values. For instance, reading the +`ConvertedType` enum becomes: + +```rust +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ConvertedType { + fn read_thrift(prot: &mut R) -> Result { + let val = prot.read_i32()?; + Ok(match val { + 0 => Self::UTF8, + 1 => Self::MAP, + 2 => Self::MAP_KEY_VALUE, + ... + 21 => Self::INTERVAL, + _ => return Err(general_err!("Unexpected ConvertedType {}", val)), + }) + } +} +``` + +The default behavior is to return an error when an unexpected field is encountered. One could, +however, provide an `Unknown` variant if forward compatibility is neeeded in the case of an +evolving enum. + +Deserializing structs is more involved, but still fairly easy. A thrift struct is serialized as +repeated `(field_id,field_type,field)` tuples. The `field_id` and `field_type` usually occupy a +single byte, followed by the Thrift encoded field. Because only 4 bits are available for the id, +encoders usually will instead use deltas from the preceding field. If the delta will exceed 15, +then the `field_id` nibble will be set to `0`, and the `field_id` will instead be encoded as a +varint, following the `field_type`. Fields will generally be read in a loop, with the `field_id` +and `field_type` read first, and then the `field_id` used to determine which field to read. +When a `field_id` of `0` is encountered, this marks the end of the struct and processing ceases. +Here is an example of the processing loop: + +```rust + let mut last_field_id = 0i16; + loop { + // read the field id and field type. break if we encounter `Stop` + let field_ident = prot.read_field_begin(last_field_id)?; + if field_ident.field_type == FieldType::Stop { + break; + } + // match on the field id + match field_ident.id { + 1 => { + let val = i32::read_thrift(&mut *prot)?; + num_values = Some(val); + } + 2 => { + let val = Encoding::read_thrift(&mut *prot)?; + encoding = Some(val); + } + 3 => { + let val = Encoding::read_thrift(&mut *prot)?; + definition_level_encoding = Some(val); + } + 4 => { + let val = Encoding::read_thrift(&mut *prot)?; + repetition_level_encoding = Some(val); + } + // Thrift structs are meant to be forward compatible, so do not error + // here. Instead, simply skip unknown fields. + _ => { + prot.skip(field_ident.field_type)?; + } + }; + // set the last seen field id to calculate the next field_id + last_field_id = field_ident.id; + } +``` + +Thrift unions are encoded as structs, but only a single field will be encoded. The loop above +can be eliminated, and only the `match` on the id performed. A subsequent call to +`read_field_begin` must return `Stop`, or an error should be returned. Here's an example from +the decoding of the `LogicalType` union: + +```rust + // read the discriminant, error if it is `0` + let field_ident = prot.read_field_begin(0)?; + if field_ident.field_type == FieldType::Stop { + return Err(general_err!("received empty union from remote LogicalType")); + } + let ret = match field_ident.id { + 1 => { + prot.skip_empty_struct()?; + Self::String + } + ... + _ => { + // LogicalType needs to be forward compatible, so we have defined an `_Unknown` + // variant for it. This can return an error if forward compatibility is not desired. + prot.skip(field_ident.field_type)?; + Self::_Unknown { + field_id: field_ident.id, + } + } + }; + // test to ensure there is only one field present + let field_ident = prot.read_field_begin(field_ident.id)?; + if field_ident.field_type != FieldType::Stop { + return Err(general_err!( + "Received multiple fields for union from remote LogicalType" + )); + } +``` + +### WriteThrift Customization + +On the serialization side, there are two traits to implement. The first, `WriteThrift`, is used +for actually serializing the object. The other, `WriteThriftField`, handles serializing objects +as struct fields. + +Serializing enums is as simple as writing the discriminant as an `i32`. For example, here is the +custom serialization code for `ConvertedType`: + +```rust +impl WriteThrift for ConvertedType { + const ELEMENT_TYPE: ElementType = ElementType::I32; + + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + // because we've added NONE, the variant values are off by 1, so correct that here + writer.write_i32(*self as i32 - 1) + } +} +``` + +Structs and unions are serialized by field. When performing the serialization, one needs to keep +track of the last field that has been written, as this is needed to calculate the delta in the +Thrift field header. For required fields this is not strictly necessary, but when writing +optional fields it is. A typical `write_thrift` implementation will look like: + +```rust + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + // required field f1 + self.f1.write_thrift_field(writer, 1, 0)?; // field_id == 1, last_field_id == 0 + // required field f2 + self.f2.write_thrift_field(writer, 2, 1)?; // field_id == 2, last_field_id == 1 + // final required field f3, we now save the last_field_id, which is returned by write_thrift_field + let mut last_field_id = self.f3.write_thrift_field(writer, 3, 2)?; // field_id == 3, last_field_id == 2 + + // optional field f4 + if let Some(val) = self.f4.as_ref() { + last_field_id = val.write_thrift_field(writer, 4, last_field_id)?; + } + // optional field f5 + if let Some(val) = self.f5.as_ref() { + last_field_id = val.write_thrift_field(writer, 5, last_field_id)?; + } + // write end of struct + writer.write_struct_end() + } +``` + +### Handling for lists + +Lists of serialized objects can usually be read using `parquet_thrift::read_thrift_vec` and written +using the `WriteThrift::write_thrift` implementation for vectors of objects that implement +`WriteThrift`. + +When reading a list, one first reads the list header which will provide the number of elements +that have been encoded, and then read elements one at a time. + +```rust + // read the list header + let list_ident = prot.read_list_begin()?; + // allocate vector with enough capacity + let mut page_locations = Vec::with_capacity(list_ident.size as usize); + // read elements + for _ in 0..list_ident.size { + page_locations.push(read_page_location(prot)?); + } +``` + +Writing is simply the reverse: write the list header, and then serialize the elements: + +```rust + // write the list header + writer.write_list_begin(ElementType::Struct, page_locations.len)?; + // write the elements + for i in 0..len { + page_locations[i].write_thrift(writer)?; + } +``` + +## More examples + +For more examples, the easiest thing to do is to [expand](https://github.com/dtolnay/cargo-expand) +the thrift macros. For instance, to see the implementations generated in the `basic` module, type: + +```sh +% cargo expand -p parquet --lib --all-features basic +``` diff --git a/parquet/src/arrow/schema/extension.rs b/parquet/src/arrow/schema/extension.rs index b4ccac773e44..bdd3d91eb97f 100644 --- a/parquet/src/arrow/schema/extension.rs +++ b/parquet/src/arrow/schema/extension.rs @@ -89,7 +89,9 @@ pub(crate) fn logical_type_for_struct(field: &Field) -> Option { return None; } match field.try_extension_type::() { - Ok(VariantType) => Some(LogicalType::Variant { specification_version: None }), + Ok(VariantType) => Some(LogicalType::Variant { + specification_version: None, + }), // Given check above, this should not error, but if it does ignore Err(_e) => None, } diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index f9150a7e8685..350f2b6de1e2 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -37,7 +37,7 @@ use crate::errors::{ParquetError, Result}; // Types from the Thrift definition // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::Type` +// Mirrors thrift enum `Type` thrift_enum!( /// Types supported by Parquet. @@ -59,10 +59,14 @@ enum Type { ); // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::ConvertedType` +// Mirrors thrift enum `ConvertedType` // // Cannot use macros because of added field `None` +// TODO(ets): Adding the `NONE` variant to this enum is a bit awkward. We should +// look into removing it and using `Option` instead. Then all of this +// handwritten code could go away. + /// Common types (converted types) used by frameworks when using Parquet. /// /// This helps map between types in those frameworks to the base types in Parquet. @@ -219,7 +223,7 @@ impl WriteThriftField for ConvertedType { } // ---------------------------------------------------------------------- -// Mirrors thrift union `crate::format::TimeUnit` +// Mirrors thrift union `TimeUnit` thrift_union_all_empty!( /// Time unit for `Time` and `Timestamp` logical types. @@ -231,7 +235,7 @@ union TimeUnit { ); // ---------------------------------------------------------------------- -// Mirrors thrift union `crate::format::LogicalType` +// Mirrors thrift union `LogicalType` // private structs for decoding logical type @@ -280,6 +284,9 @@ struct GeographyType<'a> { } ); +// TODO(ets): should we switch to tuple variants so we can use +// the thrift macros? + /// Logical types used by version 2.4.0+ of the Parquet format. /// /// This is an *entirely new* struct as of version @@ -584,9 +591,8 @@ impl WriteThriftField for LogicalType { } // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::FieldRepetitionType` +// Mirrors thrift enum `FieldRepetitionType` // -// Cannot use macro since the name is changed thrift_enum!( /// Representation of field types in schema. @@ -604,7 +610,7 @@ enum FieldRepetitionType { pub type Repetition = FieldRepetitionType; // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::Encoding` +// Mirrors thrift enum `Encoding` thrift_enum!( /// Encodings supported by Parquet. @@ -713,7 +719,7 @@ impl FromStr for Encoding { } // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::CompressionCodec` +// Mirrors thrift enum `CompressionCodec` /// Supported block compression algorithms. /// @@ -893,7 +899,7 @@ impl FromStr for Compression { } // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::PageType` +// Mirrors thrift enum `PageType` thrift_enum!( /// Available data pages for Parquet file format. @@ -907,7 +913,7 @@ enum PageType { ); // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::BoundaryOrder` +// Mirrors thrift enum `BoundaryOrder` thrift_enum!( /// Enum to annotate whether lists of min/max elements inside ColumnIndex @@ -920,7 +926,7 @@ enum BoundaryOrder { ); // ---------------------------------------------------------------------- -// Mirrors thrift enum `crate::format::EdgeInterpolationAlgorithm` +// Mirrors thrift enum `EdgeInterpolationAlgorithm` thrift_enum!( /// Edge interpolation algorithm for Geography logical type @@ -934,7 +940,7 @@ enum EdgeInterpolationAlgorithm { ); // ---------------------------------------------------------------------- -// Mirrors thrift union `crate::format::BloomFilterAlgorithm` +// Mirrors thrift union `BloomFilterAlgorithm` thrift_union_all_empty!( /// The algorithm used in Bloom filter. @@ -945,7 +951,7 @@ union BloomFilterAlgorithm { ); // ---------------------------------------------------------------------- -// Mirrors thrift union `crate::format::BloomFilterHash` +// Mirrors thrift union `BloomFilterHash` thrift_union_all_empty!( /// The hash function used in Bloom filter. This function takes the hash of a column value @@ -957,7 +963,7 @@ union BloomFilterHash { ); // ---------------------------------------------------------------------- -// Mirrors thrift union `crate::format::BloomFilterCompression` +// Mirrors thrift union `BloomFilterCompression` thrift_union_all_empty!( /// The compression used in the Bloom filter. @@ -967,7 +973,7 @@ union BloomFilterCompression { ); // ---------------------------------------------------------------------- -// Mirrors thrift union `crate::format::ColumnOrder` +// Mirrors thrift union `ColumnOrder` /// Sort order for page and column statistics. /// @@ -1188,181 +1194,6 @@ impl fmt::Display for ColumnOrder { } } -// ---------------------------------------------------------------------- -// crate::format::ConvertedType <=> ConvertedType conversion - -impl TryFrom> for ConvertedType { - type Error = ParquetError; - - fn try_from(option: Option) -> Result { - Ok(match option { - None => ConvertedType::NONE, - Some(value) => match value { - crate::format::ConvertedType::UTF8 => ConvertedType::UTF8, - crate::format::ConvertedType::MAP => ConvertedType::MAP, - crate::format::ConvertedType::MAP_KEY_VALUE => ConvertedType::MAP_KEY_VALUE, - crate::format::ConvertedType::LIST => ConvertedType::LIST, - crate::format::ConvertedType::ENUM => ConvertedType::ENUM, - crate::format::ConvertedType::DECIMAL => ConvertedType::DECIMAL, - crate::format::ConvertedType::DATE => ConvertedType::DATE, - crate::format::ConvertedType::TIME_MILLIS => ConvertedType::TIME_MILLIS, - crate::format::ConvertedType::TIME_MICROS => ConvertedType::TIME_MICROS, - crate::format::ConvertedType::TIMESTAMP_MILLIS => ConvertedType::TIMESTAMP_MILLIS, - crate::format::ConvertedType::TIMESTAMP_MICROS => ConvertedType::TIMESTAMP_MICROS, - crate::format::ConvertedType::UINT_8 => ConvertedType::UINT_8, - crate::format::ConvertedType::UINT_16 => ConvertedType::UINT_16, - crate::format::ConvertedType::UINT_32 => ConvertedType::UINT_32, - crate::format::ConvertedType::UINT_64 => ConvertedType::UINT_64, - crate::format::ConvertedType::INT_8 => ConvertedType::INT_8, - crate::format::ConvertedType::INT_16 => ConvertedType::INT_16, - crate::format::ConvertedType::INT_32 => ConvertedType::INT_32, - crate::format::ConvertedType::INT_64 => ConvertedType::INT_64, - crate::format::ConvertedType::JSON => ConvertedType::JSON, - crate::format::ConvertedType::BSON => ConvertedType::BSON, - crate::format::ConvertedType::INTERVAL => ConvertedType::INTERVAL, - _ => { - return Err(general_err!( - "unexpected parquet converted type: {}", - value.0 - )) - } - }, - }) - } -} - -impl From for Option { - fn from(value: ConvertedType) -> Self { - match value { - ConvertedType::NONE => None, - ConvertedType::UTF8 => Some(crate::format::ConvertedType::UTF8), - ConvertedType::MAP => Some(crate::format::ConvertedType::MAP), - ConvertedType::MAP_KEY_VALUE => Some(crate::format::ConvertedType::MAP_KEY_VALUE), - ConvertedType::LIST => Some(crate::format::ConvertedType::LIST), - ConvertedType::ENUM => Some(crate::format::ConvertedType::ENUM), - ConvertedType::DECIMAL => Some(crate::format::ConvertedType::DECIMAL), - ConvertedType::DATE => Some(crate::format::ConvertedType::DATE), - ConvertedType::TIME_MILLIS => Some(crate::format::ConvertedType::TIME_MILLIS), - ConvertedType::TIME_MICROS => Some(crate::format::ConvertedType::TIME_MICROS), - ConvertedType::TIMESTAMP_MILLIS => Some(crate::format::ConvertedType::TIMESTAMP_MILLIS), - ConvertedType::TIMESTAMP_MICROS => Some(crate::format::ConvertedType::TIMESTAMP_MICROS), - ConvertedType::UINT_8 => Some(crate::format::ConvertedType::UINT_8), - ConvertedType::UINT_16 => Some(crate::format::ConvertedType::UINT_16), - ConvertedType::UINT_32 => Some(crate::format::ConvertedType::UINT_32), - ConvertedType::UINT_64 => Some(crate::format::ConvertedType::UINT_64), - ConvertedType::INT_8 => Some(crate::format::ConvertedType::INT_8), - ConvertedType::INT_16 => Some(crate::format::ConvertedType::INT_16), - ConvertedType::INT_32 => Some(crate::format::ConvertedType::INT_32), - ConvertedType::INT_64 => Some(crate::format::ConvertedType::INT_64), - ConvertedType::JSON => Some(crate::format::ConvertedType::JSON), - ConvertedType::BSON => Some(crate::format::ConvertedType::BSON), - ConvertedType::INTERVAL => Some(crate::format::ConvertedType::INTERVAL), - } - } -} - -// ---------------------------------------------------------------------- -// crate::format::LogicalType <=> LogicalType conversion - -impl From for LogicalType { - fn from(value: crate::format::LogicalType) -> Self { - match value { - crate::format::LogicalType::STRING(_) => LogicalType::String, - crate::format::LogicalType::MAP(_) => LogicalType::Map, - crate::format::LogicalType::LIST(_) => LogicalType::List, - crate::format::LogicalType::ENUM(_) => LogicalType::Enum, - crate::format::LogicalType::DECIMAL(t) => LogicalType::Decimal { - scale: t.scale, - precision: t.precision, - }, - crate::format::LogicalType::DATE(_) => LogicalType::Date, - crate::format::LogicalType::TIME(t) => LogicalType::Time { - is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c, - unit: t.unit.into(), - }, - crate::format::LogicalType::TIMESTAMP(t) => LogicalType::Timestamp { - is_adjusted_to_u_t_c: t.is_adjusted_to_u_t_c, - unit: t.unit.into(), - }, - crate::format::LogicalType::INTEGER(t) => LogicalType::Integer { - bit_width: t.bit_width, - is_signed: t.is_signed, - }, - crate::format::LogicalType::UNKNOWN(_) => LogicalType::Unknown, - crate::format::LogicalType::JSON(_) => LogicalType::Json, - crate::format::LogicalType::BSON(_) => LogicalType::Bson, - crate::format::LogicalType::UUID(_) => LogicalType::Uuid, - crate::format::LogicalType::FLOAT16(_) => LogicalType::Float16, - crate::format::LogicalType::VARIANT(vt) => LogicalType::Variant { - specification_version: vt.specification_version, - }, - crate::format::LogicalType::GEOMETRY(gt) => LogicalType::Geometry { crs: gt.crs }, - crate::format::LogicalType::GEOGRAPHY(gt) => LogicalType::Geography { - crs: gt.crs, - algorithm: gt.algorithm.map(|a| a.try_into().unwrap()), - }, - } - } -} - -impl From for crate::format::LogicalType { - fn from(value: LogicalType) -> Self { - match value { - LogicalType::String => crate::format::LogicalType::STRING(Default::default()), - LogicalType::Map => crate::format::LogicalType::MAP(Default::default()), - LogicalType::List => crate::format::LogicalType::LIST(Default::default()), - LogicalType::Enum => crate::format::LogicalType::ENUM(Default::default()), - LogicalType::Decimal { scale, precision } => { - crate::format::LogicalType::DECIMAL(crate::format::DecimalType { scale, precision }) - } - LogicalType::Date => crate::format::LogicalType::DATE(Default::default()), - LogicalType::Time { - is_adjusted_to_u_t_c, - unit, - } => crate::format::LogicalType::TIME(crate::format::TimeType { - is_adjusted_to_u_t_c, - unit: unit.into(), - }), - LogicalType::Timestamp { - is_adjusted_to_u_t_c, - unit, - } => crate::format::LogicalType::TIMESTAMP(crate::format::TimestampType { - is_adjusted_to_u_t_c, - unit: unit.into(), - }), - LogicalType::Integer { - bit_width, - is_signed, - } => crate::format::LogicalType::INTEGER(crate::format::IntType { - bit_width, - is_signed, - }), - LogicalType::Unknown => crate::format::LogicalType::UNKNOWN(Default::default()), - LogicalType::Json => crate::format::LogicalType::JSON(Default::default()), - LogicalType::Bson => crate::format::LogicalType::BSON(Default::default()), - LogicalType::Uuid => crate::format::LogicalType::UUID(Default::default()), - LogicalType::Float16 => crate::format::LogicalType::FLOAT16(Default::default()), - LogicalType::Variant { - specification_version, - } => crate::format::LogicalType::VARIANT(crate::format::VariantType { - specification_version, - }), - LogicalType::Geometry { crs } => { - crate::format::LogicalType::GEOMETRY(crate::format::GeometryType { crs }) - } - LogicalType::Geography { crs, algorithm } => { - crate::format::LogicalType::GEOGRAPHY(crate::format::GeographyType { - crs, - algorithm: algorithm.map(|a| a.into()), - }) - } - LogicalType::_Unknown { .. } => { - panic!("Trying to convert unknown LogicalType to thrift"); - } - } - } -} - // ---------------------------------------------------------------------- // LogicalType <=> ConvertedType conversion @@ -1423,47 +1254,6 @@ impl From> for ConvertedType { } } -// ---------------------------------------------------------------------- -// crate::format::CompressionCodec <=> Compression conversion - -impl TryFrom for Compression { - type Error = ParquetError; - - fn try_from(value: crate::format::CompressionCodec) -> Result { - Ok(match value { - crate::format::CompressionCodec::UNCOMPRESSED => Compression::UNCOMPRESSED, - crate::format::CompressionCodec::SNAPPY => Compression::SNAPPY, - crate::format::CompressionCodec::GZIP => Compression::GZIP(Default::default()), - crate::format::CompressionCodec::LZO => Compression::LZO, - crate::format::CompressionCodec::BROTLI => Compression::BROTLI(Default::default()), - crate::format::CompressionCodec::LZ4 => Compression::LZ4, - crate::format::CompressionCodec::ZSTD => Compression::ZSTD(Default::default()), - crate::format::CompressionCodec::LZ4_RAW => Compression::LZ4_RAW, - _ => { - return Err(general_err!( - "unexpected parquet compression codec: {}", - value.0 - )) - } - }) - } -} - -impl From for crate::format::CompressionCodec { - fn from(value: Compression) -> Self { - match value { - Compression::UNCOMPRESSED => crate::format::CompressionCodec::UNCOMPRESSED, - Compression::SNAPPY => crate::format::CompressionCodec::SNAPPY, - Compression::GZIP(_) => crate::format::CompressionCodec::GZIP, - Compression::LZO => crate::format::CompressionCodec::LZO, - Compression::BROTLI(_) => crate::format::CompressionCodec::BROTLI, - Compression::LZ4 => crate::format::CompressionCodec::LZ4, - Compression::ZSTD(_) => crate::format::CompressionCodec::ZSTD, - Compression::LZ4_RAW => crate::format::CompressionCodec::LZ4_RAW, - } - } -} - // ---------------------------------------------------------------------- // String conversions for schema parsing. @@ -1580,7 +1370,7 @@ impl str::FromStr for LogicalType { #[allow(deprecated)] // allow BIT_PACKED encoding for the whole test module mod tests { use super::*; - use crate::parquet_thrift::tests::test_roundtrip; + use crate::parquet_thrift::{tests::test_roundtrip, ThriftSliceInputProtocol}; #[test] fn test_display_type() { @@ -1597,57 +1387,6 @@ mod tests { ); } - #[test] - fn test_from_type() { - assert_eq!( - Type::try_from(crate::format::Type::BOOLEAN).unwrap(), - Type::BOOLEAN - ); - assert_eq!( - Type::try_from(crate::format::Type::INT32).unwrap(), - Type::INT32 - ); - assert_eq!( - Type::try_from(crate::format::Type::INT64).unwrap(), - Type::INT64 - ); - assert_eq!( - Type::try_from(crate::format::Type::INT96).unwrap(), - Type::INT96 - ); - assert_eq!( - Type::try_from(crate::format::Type::FLOAT).unwrap(), - Type::FLOAT - ); - assert_eq!( - Type::try_from(crate::format::Type::DOUBLE).unwrap(), - Type::DOUBLE - ); - assert_eq!( - Type::try_from(crate::format::Type::BYTE_ARRAY).unwrap(), - Type::BYTE_ARRAY - ); - assert_eq!( - Type::try_from(crate::format::Type::FIXED_LEN_BYTE_ARRAY).unwrap(), - Type::FIXED_LEN_BYTE_ARRAY - ); - } - - #[test] - fn test_into_type() { - assert_eq!(crate::format::Type::BOOLEAN, Type::BOOLEAN.into()); - assert_eq!(crate::format::Type::INT32, Type::INT32.into()); - assert_eq!(crate::format::Type::INT64, Type::INT64.into()); - assert_eq!(crate::format::Type::INT96, Type::INT96.into()); - assert_eq!(crate::format::Type::FLOAT, Type::FLOAT.into()); - assert_eq!(crate::format::Type::DOUBLE, Type::DOUBLE.into()); - assert_eq!(crate::format::Type::BYTE_ARRAY, Type::BYTE_ARRAY.into()); - assert_eq!( - crate::format::Type::FIXED_LEN_BYTE_ARRAY, - Type::FIXED_LEN_BYTE_ARRAY.into() - ); - } - #[test] fn test_from_string_into_type() { assert_eq!( @@ -1714,6 +1453,17 @@ mod tests { test_roundtrip(ConvertedType::INTERVAL); } + #[test] + fn test_read_invalid_converted_type() { + let mut prot = ThriftSliceInputProtocol::new(&[0x7eu8]); + let res = ConvertedType::read_thrift(&mut prot); + assert!(res.is_err()); + assert_eq!( + res.unwrap_err().to_string(), + "Parquet error: Unexpected ConvertedType 63" + ); + } + #[test] fn test_display_converted_type() { assert_eq!(ConvertedType::NONE.to_string(), "NONE"); @@ -1749,205 +1499,6 @@ mod tests { assert_eq!(ConvertedType::DECIMAL.to_string(), "DECIMAL") } - #[test] - fn test_from_converted_type() { - let parquet_conv_none: Option = None; - assert_eq!( - ConvertedType::try_from(parquet_conv_none).unwrap(), - ConvertedType::NONE - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::UTF8)).unwrap(), - ConvertedType::UTF8 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::MAP)).unwrap(), - ConvertedType::MAP - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::MAP_KEY_VALUE)).unwrap(), - ConvertedType::MAP_KEY_VALUE - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::LIST)).unwrap(), - ConvertedType::LIST - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::ENUM)).unwrap(), - ConvertedType::ENUM - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::DECIMAL)).unwrap(), - ConvertedType::DECIMAL - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::DATE)).unwrap(), - ConvertedType::DATE - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::TIME_MILLIS)).unwrap(), - ConvertedType::TIME_MILLIS - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::TIME_MICROS)).unwrap(), - ConvertedType::TIME_MICROS - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::TIMESTAMP_MILLIS)).unwrap(), - ConvertedType::TIMESTAMP_MILLIS - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::TIMESTAMP_MICROS)).unwrap(), - ConvertedType::TIMESTAMP_MICROS - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_8)).unwrap(), - ConvertedType::UINT_8 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_16)).unwrap(), - ConvertedType::UINT_16 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_32)).unwrap(), - ConvertedType::UINT_32 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::UINT_64)).unwrap(), - ConvertedType::UINT_64 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::INT_8)).unwrap(), - ConvertedType::INT_8 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::INT_16)).unwrap(), - ConvertedType::INT_16 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::INT_32)).unwrap(), - ConvertedType::INT_32 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::INT_64)).unwrap(), - ConvertedType::INT_64 - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::JSON)).unwrap(), - ConvertedType::JSON - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::BSON)).unwrap(), - ConvertedType::BSON - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::INTERVAL)).unwrap(), - ConvertedType::INTERVAL - ); - assert_eq!( - ConvertedType::try_from(Some(crate::format::ConvertedType::DECIMAL)).unwrap(), - ConvertedType::DECIMAL - ) - } - - #[test] - fn test_into_converted_type() { - let converted_type: Option = None; - assert_eq!(converted_type, ConvertedType::NONE.into()); - assert_eq!( - Some(crate::format::ConvertedType::UTF8), - ConvertedType::UTF8.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::MAP), - ConvertedType::MAP.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::MAP_KEY_VALUE), - ConvertedType::MAP_KEY_VALUE.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::LIST), - ConvertedType::LIST.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::ENUM), - ConvertedType::ENUM.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::DECIMAL), - ConvertedType::DECIMAL.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::DATE), - ConvertedType::DATE.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::TIME_MILLIS), - ConvertedType::TIME_MILLIS.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::TIME_MICROS), - ConvertedType::TIME_MICROS.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::TIMESTAMP_MILLIS), - ConvertedType::TIMESTAMP_MILLIS.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::TIMESTAMP_MICROS), - ConvertedType::TIMESTAMP_MICROS.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::UINT_8), - ConvertedType::UINT_8.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::UINT_16), - ConvertedType::UINT_16.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::UINT_32), - ConvertedType::UINT_32.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::UINT_64), - ConvertedType::UINT_64.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::INT_8), - ConvertedType::INT_8.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::INT_16), - ConvertedType::INT_16.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::INT_32), - ConvertedType::INT_32.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::INT_64), - ConvertedType::INT_64.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::JSON), - ConvertedType::JSON.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::BSON), - ConvertedType::BSON.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::INTERVAL), - ConvertedType::INTERVAL.into() - ); - assert_eq!( - Some(crate::format::ConvertedType::DECIMAL), - ConvertedType::DECIMAL.into() - ) - } - #[test] fn test_from_string_into_converted_type() { assert_eq!( @@ -2361,38 +1912,6 @@ mod tests { assert_eq!(Repetition::REPEATED.to_string(), "REPEATED"); } - #[test] - fn test_from_repetition() { - assert_eq!( - Repetition::try_from(crate::format::FieldRepetitionType::REQUIRED).unwrap(), - Repetition::REQUIRED - ); - assert_eq!( - Repetition::try_from(crate::format::FieldRepetitionType::OPTIONAL).unwrap(), - Repetition::OPTIONAL - ); - assert_eq!( - Repetition::try_from(crate::format::FieldRepetitionType::REPEATED).unwrap(), - Repetition::REPEATED - ); - } - - #[test] - fn test_into_repetition() { - assert_eq!( - crate::format::FieldRepetitionType::REQUIRED, - Repetition::REQUIRED.into() - ); - assert_eq!( - crate::format::FieldRepetitionType::OPTIONAL, - Repetition::OPTIONAL.into() - ); - assert_eq!( - crate::format::FieldRepetitionType::REPEATED, - Repetition::REPEATED.into() - ); - } - #[test] fn test_from_string_into_repetition() { assert_eq!( @@ -2436,64 +1955,6 @@ mod tests { assert_eq!(Encoding::RLE_DICTIONARY.to_string(), "RLE_DICTIONARY"); } - #[test] - fn test_from_encoding() { - assert_eq!( - Encoding::try_from(crate::format::Encoding::PLAIN).unwrap(), - Encoding::PLAIN - ); - assert_eq!( - Encoding::try_from(crate::format::Encoding::PLAIN_DICTIONARY).unwrap(), - Encoding::PLAIN_DICTIONARY - ); - assert_eq!( - Encoding::try_from(crate::format::Encoding::RLE).unwrap(), - Encoding::RLE - ); - assert_eq!( - Encoding::try_from(crate::format::Encoding::BIT_PACKED).unwrap(), - Encoding::BIT_PACKED - ); - assert_eq!( - Encoding::try_from(crate::format::Encoding::DELTA_BINARY_PACKED).unwrap(), - Encoding::DELTA_BINARY_PACKED - ); - assert_eq!( - Encoding::try_from(crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY).unwrap(), - Encoding::DELTA_LENGTH_BYTE_ARRAY - ); - assert_eq!( - Encoding::try_from(crate::format::Encoding::DELTA_BYTE_ARRAY).unwrap(), - Encoding::DELTA_BYTE_ARRAY - ); - } - - #[test] - fn test_into_encoding() { - assert_eq!(crate::format::Encoding::PLAIN, Encoding::PLAIN.into()); - assert_eq!( - crate::format::Encoding::PLAIN_DICTIONARY, - Encoding::PLAIN_DICTIONARY.into() - ); - assert_eq!(crate::format::Encoding::RLE, Encoding::RLE.into()); - assert_eq!( - crate::format::Encoding::BIT_PACKED, - Encoding::BIT_PACKED.into() - ); - assert_eq!( - crate::format::Encoding::DELTA_BINARY_PACKED, - Encoding::DELTA_BINARY_PACKED.into() - ); - assert_eq!( - crate::format::Encoding::DELTA_LENGTH_BYTE_ARRAY, - Encoding::DELTA_LENGTH_BYTE_ARRAY.into() - ); - assert_eq!( - crate::format::Encoding::DELTA_BYTE_ARRAY, - Encoding::DELTA_BYTE_ARRAY.into() - ); - } - #[test] fn test_compression_codec_to_string() { assert_eq!(Compression::UNCOMPRESSED.codec_to_string(), "UNCOMPRESSED"); @@ -2523,70 +1984,6 @@ mod tests { ); } - #[test] - fn test_from_compression() { - assert_eq!( - Compression::try_from(crate::format::CompressionCodec::UNCOMPRESSED).unwrap(), - Compression::UNCOMPRESSED - ); - assert_eq!( - Compression::try_from(crate::format::CompressionCodec::SNAPPY).unwrap(), - Compression::SNAPPY - ); - assert_eq!( - Compression::try_from(crate::format::CompressionCodec::GZIP).unwrap(), - Compression::GZIP(Default::default()) - ); - assert_eq!( - Compression::try_from(crate::format::CompressionCodec::LZO).unwrap(), - Compression::LZO - ); - assert_eq!( - Compression::try_from(crate::format::CompressionCodec::BROTLI).unwrap(), - Compression::BROTLI(Default::default()) - ); - assert_eq!( - Compression::try_from(crate::format::CompressionCodec::LZ4).unwrap(), - Compression::LZ4 - ); - assert_eq!( - Compression::try_from(crate::format::CompressionCodec::ZSTD).unwrap(), - Compression::ZSTD(Default::default()) - ); - } - - #[test] - fn test_into_compression() { - assert_eq!( - crate::format::CompressionCodec::UNCOMPRESSED, - Compression::UNCOMPRESSED.into() - ); - assert_eq!( - crate::format::CompressionCodec::SNAPPY, - Compression::SNAPPY.into() - ); - assert_eq!( - crate::format::CompressionCodec::GZIP, - Compression::GZIP(Default::default()).into() - ); - assert_eq!( - crate::format::CompressionCodec::LZO, - Compression::LZO.into() - ); - assert_eq!( - crate::format::CompressionCodec::BROTLI, - Compression::BROTLI(Default::default()).into() - ); - assert_eq!( - crate::format::CompressionCodec::LZ4, - Compression::LZ4.into() - ); - assert_eq!( - crate::format::CompressionCodec::ZSTD, - Compression::ZSTD(Default::default()).into() - ); - } - #[test] fn test_display_page_type() { assert_eq!(PageType::DATA_PAGE.to_string(), "DATA_PAGE"); @@ -2595,46 +1992,6 @@ mod tests { assert_eq!(PageType::DATA_PAGE_V2.to_string(), "DATA_PAGE_V2"); } - #[test] - fn test_from_page_type() { - assert_eq!( - PageType::try_from(crate::format::PageType::DATA_PAGE).unwrap(), - PageType::DATA_PAGE - ); - assert_eq!( - PageType::try_from(crate::format::PageType::INDEX_PAGE).unwrap(), - PageType::INDEX_PAGE - ); - assert_eq!( - PageType::try_from(crate::format::PageType::DICTIONARY_PAGE).unwrap(), - PageType::DICTIONARY_PAGE - ); - assert_eq!( - PageType::try_from(crate::format::PageType::DATA_PAGE_V2).unwrap(), - PageType::DATA_PAGE_V2 - ); - } - - #[test] - fn test_into_page_type() { - assert_eq!( - crate::format::PageType::DATA_PAGE, - PageType::DATA_PAGE.into() - ); - assert_eq!( - crate::format::PageType::INDEX_PAGE, - PageType::INDEX_PAGE.into() - ); - assert_eq!( - crate::format::PageType::DICTIONARY_PAGE, - PageType::DICTIONARY_PAGE.into() - ); - assert_eq!( - crate::format::PageType::DATA_PAGE_V2, - PageType::DATA_PAGE_V2.into() - ); - } - #[test] fn test_display_sort_order() { assert_eq!(SortOrder::SIGNED.to_string(), "SIGNED"); @@ -2953,38 +2310,6 @@ mod tests { assert_eq!(BoundaryOrder::UNORDERED.to_string(), "UNORDERED"); } - #[test] - fn test_from_boundary_order() { - assert_eq!( - BoundaryOrder::try_from(crate::format::BoundaryOrder::ASCENDING).unwrap(), - BoundaryOrder::ASCENDING - ); - assert_eq!( - BoundaryOrder::try_from(crate::format::BoundaryOrder::DESCENDING).unwrap(), - BoundaryOrder::DESCENDING - ); - assert_eq!( - BoundaryOrder::try_from(crate::format::BoundaryOrder::UNORDERED).unwrap(), - BoundaryOrder::UNORDERED - ); - } - - #[test] - fn test_into_boundary_order() { - assert_eq!( - crate::format::BoundaryOrder::ASCENDING, - BoundaryOrder::ASCENDING.into() - ); - assert_eq!( - crate::format::BoundaryOrder::DESCENDING, - BoundaryOrder::DESCENDING.into() - ); - assert_eq!( - crate::format::BoundaryOrder::UNORDERED, - BoundaryOrder::UNORDERED.into() - ); - } - #[test] fn test_display_edge_algo() { assert_eq!( @@ -2996,63 +2321,4 @@ mod tests { assert_eq!(EdgeInterpolationAlgorithm::ANDOYER.to_string(), "ANDOYER"); assert_eq!(EdgeInterpolationAlgorithm::KARNEY.to_string(), "KARNEY"); } - - #[test] - fn test_from_edge_algo() { - assert_eq!( - EdgeInterpolationAlgorithm::try_from( - crate::format::EdgeInterpolationAlgorithm::SPHERICAL - ) - .unwrap(), - EdgeInterpolationAlgorithm::SPHERICAL - ); - assert_eq!( - EdgeInterpolationAlgorithm::try_from( - crate::format::EdgeInterpolationAlgorithm::VINCENTY - ) - .unwrap(), - EdgeInterpolationAlgorithm::VINCENTY - ); - assert_eq!( - EdgeInterpolationAlgorithm::try_from(crate::format::EdgeInterpolationAlgorithm::THOMAS) - .unwrap(), - EdgeInterpolationAlgorithm::THOMAS - ); - assert_eq!( - EdgeInterpolationAlgorithm::try_from( - crate::format::EdgeInterpolationAlgorithm::ANDOYER - ) - .unwrap(), - EdgeInterpolationAlgorithm::ANDOYER - ); - assert_eq!( - EdgeInterpolationAlgorithm::try_from(crate::format::EdgeInterpolationAlgorithm::KARNEY) - .unwrap(), - EdgeInterpolationAlgorithm::KARNEY - ); - } - - #[test] - fn test_into_edge_algo() { - assert_eq!( - crate::format::EdgeInterpolationAlgorithm::SPHERICAL, - EdgeInterpolationAlgorithm::SPHERICAL.into() - ); - assert_eq!( - crate::format::EdgeInterpolationAlgorithm::VINCENTY, - EdgeInterpolationAlgorithm::VINCENTY.into() - ); - assert_eq!( - crate::format::EdgeInterpolationAlgorithm::THOMAS, - EdgeInterpolationAlgorithm::THOMAS.into() - ); - assert_eq!( - crate::format::EdgeInterpolationAlgorithm::ANDOYER, - EdgeInterpolationAlgorithm::ANDOYER.into() - ); - assert_eq!( - crate::format::EdgeInterpolationAlgorithm::KARNEY, - EdgeInterpolationAlgorithm::KARNEY.into() - ); - } } diff --git a/parquet/src/bin/parquet-layout.rs b/parquet/src/bin/parquet-layout.rs index 46a231a7d02b..6f589fab66ed 100644 --- a/parquet/src/bin/parquet-layout.rs +++ b/parquet/src/bin/parquet-layout.rs @@ -41,7 +41,7 @@ use parquet::file::metadata::ParquetMetaDataReader; use serde::Serialize; use thrift::protocol::TCompactInputProtocol; -use parquet::basic::{Compression, Encoding}; +use parquet::basic::Compression; use parquet::errors::Result; use parquet::file::reader::ChunkReader; use parquet::format::PageHeader; @@ -105,7 +105,7 @@ fn do_layout(reader: &C) -> Result { if let Some(dictionary) = header.dictionary_page_header { pages.push(Page { compression, - encoding: encoding(dictionary.encoding), + encoding: encoding(dictionary.encoding.0), page_type: "dictionary", offset: start, compressed_bytes: header.compressed_page_size, @@ -116,7 +116,7 @@ fn do_layout(reader: &C) -> Result { } else if let Some(data_page) = header.data_page_header { pages.push(Page { compression, - encoding: encoding(data_page.encoding), + encoding: encoding(data_page.encoding.0), page_type: "data_page_v1", offset: start, compressed_bytes: header.compressed_page_size, @@ -129,7 +129,7 @@ fn do_layout(reader: &C) -> Result { pages.push(Page { compression: compression.filter(|_| is_compressed), - encoding: encoding(data_page.encoding), + encoding: encoding(data_page.encoding.0), page_type: "data_page_v2", offset: start, compressed_bytes: header.compressed_page_size, @@ -196,19 +196,19 @@ fn compression(compression: Compression) -> Option<&'static str> { } /// Returns a string representation for a given encoding -fn encoding(encoding: parquet::format::Encoding) -> &'static str { - match Encoding::try_from(encoding) { - Ok(Encoding::PLAIN) => "plain", - Ok(Encoding::PLAIN_DICTIONARY) => "plain_dictionary", - Ok(Encoding::RLE) => "rle", +fn encoding(encoding: i32) -> &'static str { + match encoding { + 0 => "plain", + 2 => "plain_dictionary", + 3 => "rle", #[allow(deprecated)] - Ok(Encoding::BIT_PACKED) => "bit_packed", - Ok(Encoding::DELTA_BINARY_PACKED) => "delta_binary_packed", - Ok(Encoding::DELTA_LENGTH_BYTE_ARRAY) => "delta_length_byte_array", - Ok(Encoding::DELTA_BYTE_ARRAY) => "delta_byte_array", - Ok(Encoding::RLE_DICTIONARY) => "rle_dictionary", - Ok(Encoding::BYTE_STREAM_SPLIT) => "byte_stream_split", - Err(_) => "unknown", + 4 => "bit_packed", + 5 => "delta_binary_packed", + 6 => "delta_length_byte_array", + 7 => "delta_byte_array", + 8 => "rle_dictionary", + 9 => "byte_stream_split", + _ => "unknown", } } diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs index e1cc5fd68cd7..290a887b2960 100644 --- a/parquet/src/bloom_filter/mod.rs +++ b/parquet/src/bloom_filter/mod.rs @@ -74,13 +74,16 @@ use crate::basic::{BloomFilterAlgorithm, BloomFilterCompression, BloomFilterHash}; use crate::data_type::AsBytes; -use crate::errors::ParquetError; +use crate::errors::{ParquetError, Result}; use crate::file::metadata::ColumnChunkMetaData; use crate::file::reader::ChunkReader; -use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; +use crate::parquet_thrift::{ + ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, + ThriftSliceInputProtocol, WriteThrift, WriteThriftField, +}; +use crate::thrift_struct; use bytes::Bytes; use std::io::Write; -use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol}; use twox_hash::XxHash64; /// Salt as defined in the [spec](https://github.com/apache/parquet-format/blob/master/BloomFilter.md#technical-approach). @@ -95,42 +98,21 @@ const SALT: [u32; 8] = [ 0x5c6bfb31_u32, ]; +thrift_struct!( /// Bloom filter header is stored at beginning of Bloom filter data of each column /// and followed by its bitset. /// -#[derive(Clone, Debug, Eq, PartialEq)] pub struct BloomFilterHeader { - /// The size of bitset in bytes * - pub num_bytes: i32, - /// The algorithm for setting bits. * - pub algorithm: BloomFilterAlgorithm, - /// The hash function used for Bloom filter. * - pub hash: BloomFilterHash, - /// The compression used in the Bloom filter * - pub compression: BloomFilterCompression, -} - -impl From for BloomFilterHeader { - fn from(value: crate::format::BloomFilterHeader) -> Self { - Self { - num_bytes: value.num_bytes, - algorithm: value.algorithm.into(), - hash: value.hash.into(), - compression: value.compression.into(), - } - } -} - -impl From for crate::format::BloomFilterHeader { - fn from(value: BloomFilterHeader) -> Self { - Self { - num_bytes: value.num_bytes, - algorithm: value.algorithm.into(), - hash: value.hash.into(), - compression: value.compression.into(), - } - } + /// The size of bitset in bytes + 1: required i32 num_bytes; + /// The algorithm for setting bits. + 2: required BloomFilterAlgorithm algorithm; + /// The hash function used for Bloom filter + 3: required BloomFilterHash hash; + /// The compression used in the Bloom filter + 4: required BloomFilterCompression compression; } +); /// Each block is 256 bits, broken up into eight contiguous "words", each consisting of 32 bits. /// Each word is thought of as an array of bits; each bit is either "set" or "not set". @@ -235,10 +217,10 @@ pub(crate) fn read_bloom_filter_header_and_length( buffer: Bytes, ) -> Result<(BloomFilterHeader, u64), ParquetError> { let total_length = buffer.len(); - let mut prot = TCompactSliceInputProtocol::new(buffer.as_ref()); - let header = crate::format::BloomFilterHeader::read_from_in_protocol(&mut prot) + let mut prot = ThriftSliceInputProtocol::new(buffer.as_ref()); + let header = BloomFilterHeader::read_thrift(&mut prot) .map_err(|e| ParquetError::General(format!("Could not read bloom filter header: {e}")))?; - Ok((header.into(), (total_length - prot.as_slice().len()) as u64)) + Ok((header, (total_length - prot.as_slice().len()) as u64)) } pub(crate) const BITSET_MIN_LENGTH: usize = 32; @@ -302,12 +284,10 @@ impl Sbbf { /// flush the writer in order to boost performance of bulk writing all blocks. Caller /// must remember to flush the writer. pub(crate) fn write(&self, mut writer: W) -> Result<(), ParquetError> { - let mut protocol = TCompactOutputProtocol::new(&mut writer); - let header: crate::format::BloomFilterHeader = self.header().into(); - header.write_to_out_protocol(&mut protocol).map_err(|e| { + let mut protocol = ThriftCompactOutputProtocol::new(&mut writer); + self.header().write_thrift(&mut protocol).map_err(|e| { ParquetError::General(format!("Could not write bloom filter header: {e}")) })?; - protocol.flush()?; self.write_bitset(&mut writer)?; Ok(()) } diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs index 23bf4548fbb4..09125eaabf02 100644 --- a/parquet/src/column/page.rs +++ b/parquet/src/column/page.rs @@ -218,7 +218,7 @@ impl CompressedPage { let page_type = self.page_type(); let mut page_header = PageHeader { - type_: page_type, + r#type: page_type, uncompressed_page_size: uncompressed_size as i32, compressed_page_size: compressed_size as i32, // TODO: Add support for crc checksum @@ -351,7 +351,7 @@ impl TryFrom<&crate::file::metadata::thrift_gen::PageHeader> for PageMetadata { fn try_from( value: &crate::file::metadata::thrift_gen::PageHeader, ) -> std::result::Result { - match value.type_ { + match value.r#type { PageType::DATA_PAGE => { let header = value.data_page_header.as_ref().unwrap(); Ok(PageMetadata { diff --git a/parquet/src/column/page_encryption.rs b/parquet/src/column/page_encryption.rs index 7ee367a289c8..2486c2c289c4 100644 --- a/parquet/src/column/page_encryption.rs +++ b/parquet/src/column/page_encryption.rs @@ -95,14 +95,14 @@ impl PageEncryptor { page_header: &PageHeader, sink: &mut W, ) -> Result<()> { - let module_type = match page_header.type_ { + let module_type = match page_header.r#type { PageType::DATA_PAGE => ModuleType::DataPageHeader, PageType::DATA_PAGE_V2 => ModuleType::DataPageHeader, PageType::DICTIONARY_PAGE => ModuleType::DictionaryPageHeader, _ => { return Err(general_err!( "Unsupported page type for page header encryption: {:?}", - page_header.type_ + page_header.r#type )) } }; diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index ee400f200e4d..2ef2d236e7f7 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -40,9 +40,8 @@ use crate::encryption::encrypt::get_column_crypto_metadata; use crate::errors::{ParquetError, Result}; use crate::file::metadata::{ ColumnChunkMetaData, ColumnChunkMetaDataBuilder, ColumnIndexBuilder, LevelHistogram, - OffsetIndexBuilder, + OffsetIndexBuilder, PageEncodingStats, }; -use crate::file::page_encoding_stats::PageEncodingStats; use crate::file::properties::{ EnabledStatistics, WriterProperties, WriterPropertiesPtr, WriterVersion, }; diff --git a/parquet/src/file/column_crypto_metadata.rs b/parquet/src/file/column_crypto_metadata.rs index 429e7946dd67..7628fb615a9d 100644 --- a/parquet/src/file/column_crypto_metadata.rs +++ b/parquet/src/file/column_crypto_metadata.rs @@ -21,11 +21,6 @@ use std::io::Write; use crate::errors::{ParquetError, Result}; use crate::file::metadata::HeapSize; -use crate::format::{ - ColumnCryptoMetaData as TColumnCryptoMetaData, - EncryptionWithColumnKey as TEncryptionWithColumnKey, - EncryptionWithFooterKey as TEncryptionWithFooterKey, -}; use crate::parquet_thrift::{ read_thrift_vec, ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, WriteThrift, WriteThriftField, @@ -69,61 +64,11 @@ impl HeapSize for ColumnCryptoMetaData { } } -/// Converts Thrift definition into `ColumnCryptoMetadata`. -pub fn try_from_thrift( - thrift_column_crypto_metadata: &TColumnCryptoMetaData, -) -> Result { - let crypto_metadata = match thrift_column_crypto_metadata { - TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(_) => { - ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY - } - TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(encryption_with_column_key) => { - ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey { - path_in_schema: encryption_with_column_key.path_in_schema.clone(), - key_metadata: encryption_with_column_key.key_metadata.clone(), - }) - } - }; - Ok(crypto_metadata) -} - -/// Converts `ColumnCryptoMetadata` into Thrift definition. -pub fn to_thrift(column_crypto_metadata: &ColumnCryptoMetaData) -> TColumnCryptoMetaData { - match column_crypto_metadata { - ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY => { - TColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(TEncryptionWithFooterKey {}) - } - ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(encryption_with_column_key) => { - TColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(TEncryptionWithColumnKey { - path_in_schema: encryption_with_column_key.path_in_schema.clone(), - key_metadata: encryption_with_column_key.key_metadata.clone(), - }) - } - } -} - #[cfg(test)] mod tests { use super::*; use crate::parquet_thrift::tests::test_roundtrip; - #[test] - fn test_encryption_with_footer_key_from_thrift() { - let metadata = ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY; - - assert_eq!(try_from_thrift(&to_thrift(&metadata)).unwrap(), metadata); - } - - #[test] - fn test_encryption_with_column_key_from_thrift() { - let metadata = ColumnCryptoMetaData::ENCRYPTION_WITH_COLUMN_KEY(EncryptionWithColumnKey { - path_in_schema: vec!["abc".to_owned(), "def".to_owned()], - key_metadata: Some(vec![0, 1, 2, 3, 4, 5]), - }); - - assert_eq!(try_from_thrift(&to_thrift(&metadata)).unwrap(), metadata); - } - #[test] fn test_column_crypto_roundtrip() { test_roundtrip(ColumnCryptoMetaData::ENCRYPTION_WITH_FOOTER_KEY); diff --git a/parquet/src/file/metadata/memory.rs b/parquet/src/file/metadata/memory.rs index bfe6b0255c5c..208e62537bcb 100644 --- a/parquet/src/file/metadata/memory.rs +++ b/parquet/src/file/metadata/memory.rs @@ -21,9 +21,8 @@ use crate::basic::{BoundaryOrder, ColumnOrder, Compression, Encoding, PageType}; use crate::data_type::private::ParquetValueType; use crate::file::metadata::{ - ColumnChunkMetaData, FileMetaData, KeyValue, RowGroupMetaData, SortingColumn, + ColumnChunkMetaData, FileMetaData, KeyValue, PageEncodingStats, RowGroupMetaData, SortingColumn, }; -use crate::file::page_encoding_stats::PageEncodingStats; use crate::file::page_index::column_index::{ ByteArrayColumnIndex, ColumnIndex, ColumnIndexMetaData, PrimitiveColumnIndex, }; diff --git a/parquet/src/file/metadata/mod.rs b/parquet/src/file/metadata/mod.rs index 2257b8fdf25e..b7e99e67b632 100644 --- a/parquet/src/file/metadata/mod.rs +++ b/parquet/src/file/metadata/mod.rs @@ -17,9 +17,7 @@ //! Parquet metadata API //! -//! Most users should use these structures to interact with Parquet metadata. -//! The [crate::format] module contains lower level structures generated from the -//! Parquet thrift definition. +//! Users should use these structures to interact with Parquet metadata. //! //! * [`ParquetMetaData`]: Top level metadata container, read from the Parquet //! file footer. @@ -66,7 +64,6 @@ //! with a more idiomatic API. Note that, confusingly, some but not all //! of these structures have the same name as the [`format`] structures. //! -//! [`format`]: crate::format //! [`file::metadata`]: crate::file::metadata //! [parquet.thrift]: https://github.com/apache/parquet-format/blob/master/src/main/thrift/parquet.thrift //! @@ -98,18 +95,15 @@ pub(crate) mod reader; pub(crate) mod thrift_gen; mod writer; +use crate::basic::PageType; #[cfg(feature = "encryption")] use crate::encryption::decrypt::FileDecryptor; #[cfg(feature = "encryption")] -use crate::file::column_crypto_metadata::{self, ColumnCryptoMetaData}; +use crate::file::column_crypto_metadata::ColumnCryptoMetaData; pub(crate) use crate::file::metadata::memory::HeapSize; use crate::file::page_index::column_index::{ByteArrayColumnIndex, PrimitiveColumnIndex}; -use crate::file::statistics::{self, Statistics}; -use crate::file::{ - page_encoding_stats::{self, PageEncodingStats}, - page_index::{column_index::ColumnIndexMetaData, offset_index::PageLocation}, -}; -use crate::format::ColumnCryptoMetaData as TColumnCryptoMetaData; +use crate::file::page_index::{column_index::ColumnIndexMetaData, offset_index::PageLocation}; +use crate::file::statistics::Statistics; use crate::geospatial::statistics as geo_statistics; use crate::schema::types::{ ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor, @@ -168,7 +162,7 @@ pub type ParquetColumnIndex = Vec>; /// `column_number`of row group `row_group_number`. /// /// [PageIndex documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -/// [`OffsetIndex`]: crate::format::OffsetIndex +/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md pub type ParquetOffsetIndex = Vec>; /// Parsed metadata for a single Parquet file @@ -430,7 +424,6 @@ impl From for ParquetMetaDataBuilder { } } -// TODO: should this move to thrift_gen? thrift_struct!( /// A key-value pair for [`FileMetaData`]. pub struct KeyValue { @@ -452,6 +445,15 @@ impl KeyValue { } } +thrift_struct!( +/// PageEncodingStats for a column chunk and data page. +pub struct PageEncodingStats { + 1: required PageType page_type; + 2: required Encoding encoding; + 3: required i32 count; +} +); + /// Reference counted pointer for [`FileMetaData`]. pub type FileMetaDataPtr = Arc; @@ -553,7 +555,6 @@ impl FileMetaData { } } -// TODO: should this move to thrift_gen? thrift_struct!( /// Sort order within a RowGroup of a leaf column pub struct SortingColumn { @@ -569,26 +570,6 @@ pub struct SortingColumn { } ); -impl From<&crate::format::SortingColumn> for SortingColumn { - fn from(value: &crate::format::SortingColumn) -> Self { - Self { - column_idx: value.column_idx, - descending: value.descending, - nulls_first: value.nulls_first, - } - } -} - -impl From<&SortingColumn> for crate::format::SortingColumn { - fn from(value: &SortingColumn) -> Self { - Self { - column_idx: value.column_idx, - descending: value.descending, - nulls_first: value.nulls_first, - } - } -} - /// Reference counted pointer for [`RowGroupMetaData`]. pub type RowGroupMetaDataPtr = Arc; @@ -680,60 +661,6 @@ impl RowGroupMetaData { self.file_offset } - /// Method to convert from Thrift. - pub fn from_thrift( - schema_descr: SchemaDescPtr, - mut rg: crate::format::RowGroup, - ) -> Result { - if schema_descr.num_columns() != rg.columns.len() { - return Err(general_err!( - "Column count mismatch. Schema has {} columns while Row Group has {}", - schema_descr.num_columns(), - rg.columns.len() - )); - } - let total_byte_size = rg.total_byte_size; - let num_rows = rg.num_rows; - let mut columns = vec![]; - - for (c, d) in rg.columns.drain(0..).zip(schema_descr.columns()) { - columns.push(ColumnChunkMetaData::from_thrift(d.clone(), c)?); - } - - let sorting_columns = rg.sorting_columns.map(|scs| { - scs.iter() - .map(|sc| sc.into()) - .collect::>() - }); - Ok(RowGroupMetaData { - columns, - num_rows, - sorting_columns, - total_byte_size, - schema_descr, - file_offset: rg.file_offset, - ordinal: rg.ordinal, - }) - } - - /// Method to convert to Thrift. - pub fn to_thrift(&self) -> crate::format::RowGroup { - let sorting_columns = self.sorting_columns().map(|scs| { - scs.iter() - .map(|sc| sc.into()) - .collect::>() - }); - crate::format::RowGroup { - columns: self.columns().iter().map(|v| v.to_thrift()).collect(), - total_byte_size: self.total_byte_size, - num_rows: self.num_rows, - sorting_columns, - file_offset: self.file_offset(), - total_compressed_size: Some(self.compressed_size()), - ordinal: self.ordinal, - } - } - /// Converts this [`RowGroupMetaData`] into a [`RowGroupMetaDataBuilder`] pub fn into_builder(self) -> RowGroupMetaDataBuilder { RowGroupMetaDataBuilder(self) @@ -1153,188 +1080,10 @@ impl ColumnChunkMetaData { self.column_crypto_metadata.as_ref() } - /// Method to convert from Thrift. - pub fn from_thrift( - column_descr: ColumnDescPtr, - cc: crate::format::ColumnChunk, - ) -> Result { - if cc.meta_data.is_none() { - return Err(general_err!("Expected to have column metadata")); - } - let mut col_metadata: crate::format::ColumnMetaData = cc.meta_data.unwrap(); - let column_type = Type::try_from(col_metadata.type_)?; - let encodings = col_metadata - .encodings - .drain(0..) - .map(Encoding::try_from) - .collect::>()?; - let compression = Compression::try_from(col_metadata.codec)?; - let file_path = cc.file_path; - let file_offset = cc.file_offset; - let num_values = col_metadata.num_values; - let total_compressed_size = col_metadata.total_compressed_size; - let total_uncompressed_size = col_metadata.total_uncompressed_size; - let data_page_offset = col_metadata.data_page_offset; - let index_page_offset = col_metadata.index_page_offset; - let dictionary_page_offset = col_metadata.dictionary_page_offset; - let statistics = statistics::from_thrift(column_type, col_metadata.statistics)?; - //let geo_statistics = - // geo_statistics::from_thrift(col_metadata.geospatial_statistics).map(Box::new); - let encoding_stats = col_metadata - .encoding_stats - .as_ref() - .map(|vec| { - vec.iter() - .map(page_encoding_stats::try_from_thrift) - .collect::>() - }) - .transpose()?; - let bloom_filter_offset = col_metadata.bloom_filter_offset; - let bloom_filter_length = col_metadata.bloom_filter_length; - let offset_index_offset = cc.offset_index_offset; - let offset_index_length = cc.offset_index_length; - let column_index_offset = cc.column_index_offset; - let column_index_length = cc.column_index_length; - let ( - unencoded_byte_array_data_bytes, - repetition_level_histogram, - definition_level_histogram, - ) = if let Some(size_stats) = col_metadata.size_statistics { - ( - size_stats.unencoded_byte_array_data_bytes, - size_stats.repetition_level_histogram, - size_stats.definition_level_histogram, - ) - } else { - (None, None, None) - }; - - let repetition_level_histogram = repetition_level_histogram.map(LevelHistogram::from); - let definition_level_histogram = definition_level_histogram.map(LevelHistogram::from); - - #[cfg(feature = "encryption")] - let column_crypto_metadata = if let Some(crypto_metadata) = cc.crypto_metadata { - Some(column_crypto_metadata::try_from_thrift(&crypto_metadata)?) - } else { - None - }; - - let result = ColumnChunkMetaData { - column_descr, - encodings, - file_path, - file_offset, - num_values, - compression, - total_compressed_size, - total_uncompressed_size, - data_page_offset, - index_page_offset, - dictionary_page_offset, - statistics, - encoding_stats, - bloom_filter_offset, - bloom_filter_length, - offset_index_offset, - offset_index_length, - column_index_offset, - column_index_length, - unencoded_byte_array_data_bytes, - repetition_level_histogram, - definition_level_histogram, - geo_statistics: None, - #[cfg(feature = "encryption")] - column_crypto_metadata, - #[cfg(feature = "encryption")] - encrypted_column_metadata: None, - }; - Ok(result) - } - - /// Method to convert to Thrift. - pub fn to_thrift(&self) -> crate::format::ColumnChunk { - let column_metadata = self.to_column_metadata_thrift(); - - crate::format::ColumnChunk { - file_path: self.file_path().map(|s| s.to_owned()), - file_offset: self.file_offset, - meta_data: Some(column_metadata), - offset_index_offset: self.offset_index_offset, - offset_index_length: self.offset_index_length, - column_index_offset: self.column_index_offset, - column_index_length: self.column_index_length, - crypto_metadata: self.column_crypto_metadata_thrift(), - encrypted_column_metadata: None, - } - } - - /// Method to convert to Thrift `ColumnMetaData` - pub fn to_column_metadata_thrift(&self) -> crate::format::ColumnMetaData { - let size_statistics = if self.unencoded_byte_array_data_bytes.is_some() - || self.repetition_level_histogram.is_some() - || self.definition_level_histogram.is_some() - { - let repetition_level_histogram = self - .repetition_level_histogram - .as_ref() - .map(|hist| hist.clone().into_inner()); - - let definition_level_histogram = self - .definition_level_histogram - .as_ref() - .map(|hist| hist.clone().into_inner()); - - Some(crate::format::SizeStatistics { - unencoded_byte_array_data_bytes: self.unencoded_byte_array_data_bytes, - repetition_level_histogram, - definition_level_histogram, - }) - } else { - None - }; - - crate::format::ColumnMetaData { - type_: self.column_type().into(), - encodings: self.encodings().iter().map(|&v| v.into()).collect(), - path_in_schema: self.column_path().as_ref().to_vec(), - codec: self.compression.into(), - num_values: self.num_values, - total_uncompressed_size: self.total_uncompressed_size, - total_compressed_size: self.total_compressed_size, - key_value_metadata: None, - data_page_offset: self.data_page_offset, - index_page_offset: self.index_page_offset, - dictionary_page_offset: self.dictionary_page_offset, - statistics: statistics::to_thrift(self.statistics.as_ref()), - encoding_stats: self - .encoding_stats - .as_ref() - .map(|vec| vec.iter().map(page_encoding_stats::to_thrift).collect()), - bloom_filter_offset: self.bloom_filter_offset, - bloom_filter_length: self.bloom_filter_length, - size_statistics, - geospatial_statistics: geo_statistics::to_thrift( - self.geo_statistics.as_ref().map(|boxed| boxed.as_ref()), - ), - } - } - /// Converts this [`ColumnChunkMetaData`] into a [`ColumnChunkMetaDataBuilder`] pub fn into_builder(self) -> ColumnChunkMetaDataBuilder { ColumnChunkMetaDataBuilder::from(self) } - - #[cfg(feature = "encryption")] - fn column_crypto_metadata_thrift(&self) -> Option { - self.column_crypto_metadata - .as_ref() - .map(column_crypto_metadata::to_thrift) - } - - #[cfg(not(feature = "encryption"))] - fn column_crypto_metadata_thrift(&self) -> Option { - None - } } /// Builder for [`ColumnChunkMetaData`] @@ -1547,7 +1296,7 @@ impl ColumnChunkMetaDataBuilder { /// Builder for Parquet [`ColumnIndex`], part of the Parquet [PageIndex] /// /// [PageIndex]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -/// [`ColumnIndex`]: crate::format::ColumnIndex +/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md pub struct ColumnIndexBuilder { column_type: Type, null_pages: Vec, @@ -1602,7 +1351,7 @@ impl ColumnIndexBuilder { /// Append the given page-level histograms to the [`ColumnIndex`] histograms. /// Does nothing if the `ColumnIndexBuilder` is not in the `valid` state. /// - /// [`ColumnIndex`]: crate::format::ColumnIndex + /// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md pub fn append_histograms( &mut self, repetition_level_histogram: &Option, @@ -1638,21 +1387,6 @@ impl ColumnIndexBuilder { self.valid } - /// Build and get the thrift metadata of column index - /// - /// Note: callers should check [`Self::valid`] before calling this method - pub fn build_to_thrift(self) -> crate::format::ColumnIndex { - crate::format::ColumnIndex::new( - self.null_pages, - self.min_values, - self.max_values, - self.boundary_order.into(), - self.null_counts, - self.repetition_level_histograms, - self.definition_level_histograms, - ) - } - /// Build and get the column index /// /// Note: callers should check [`Self::valid`] before calling this method @@ -1787,20 +1521,6 @@ impl OffsetIndexBuilder { } } - /// Build and get the thrift metadata of offset index - pub fn build_to_thrift(self) -> crate::format::OffsetIndex { - let locations = self - .offset_array - .iter() - .zip(self.compressed_page_size_array.iter()) - .zip(self.first_row_index_array.iter()) - .map(|((offset, size), row_index)| { - crate::format::PageLocation::new(*offset, *size, *row_index) - }) - .collect::>(); - crate::format::OffsetIndex::new(locations, self.unencoded_byte_array_data_bytes_array) - } - /// Build and get the thrift metadata of offset index pub fn build(self) -> OffsetIndexMetaData { let locations = self @@ -1825,7 +1545,7 @@ impl OffsetIndexBuilder { mod tests { use super::*; use crate::basic::{PageType, SortOrder}; - use crate::file::page_index::column_index::{ColumnIndex, PrimitiveColumnIndex}; + use crate::file::metadata::thrift_gen::tests::{read_column_chunk, read_row_group}; #[test] fn test_row_group_metadata_thrift_conversion() { @@ -1844,12 +1564,13 @@ mod tests { .build() .unwrap(); - let row_group_exp = row_group_meta.to_thrift(); - let row_group_res = RowGroupMetaData::from_thrift(schema_descr, row_group_exp.clone()) - .unwrap() - .to_thrift(); + let mut buf = Vec::new(); + let mut writer = ThriftCompactOutputProtocol::new(&mut buf); + row_group_meta.write_thrift(&mut writer).unwrap(); - assert_eq!(row_group_res, row_group_exp); + let row_group_res = read_row_group(&mut buf, schema_descr).unwrap(); + + assert_eq!(row_group_res, row_group_meta); } #[test] @@ -1925,11 +1646,13 @@ mod tests { .set_ordinal(1) .build() .unwrap(); + let mut buf = Vec::new(); + let mut writer = ThriftCompactOutputProtocol::new(&mut buf); + row_group_meta_2cols.write_thrift(&mut writer).unwrap(); - let err = - RowGroupMetaData::from_thrift(schema_descr_3cols, row_group_meta_2cols.to_thrift()) - .unwrap_err() - .to_string(); + let err = read_row_group(&mut buf, schema_descr_3cols) + .unwrap_err() + .to_string(); assert_eq!( err, "Parquet error: Column count mismatch. Schema has 3 columns while Row Group has 2" @@ -1973,8 +1696,10 @@ mod tests { .build() .unwrap(); - let col_chunk_res = - ColumnChunkMetaData::from_thrift(column_descr, col_metadata.to_thrift()).unwrap(); + let mut buf = Vec::new(); + let mut writer = ThriftCompactOutputProtocol::new(&mut buf); + col_metadata.write_thrift(&mut writer).unwrap(); + let col_chunk_res = read_column_chunk(&mut buf, column_descr).unwrap(); assert_eq!(col_chunk_res, col_metadata); } @@ -1987,12 +1712,12 @@ mod tests { .build() .unwrap(); - let col_chunk_exp = col_metadata.to_thrift(); - let col_chunk_res = ColumnChunkMetaData::from_thrift(column_descr, col_chunk_exp.clone()) - .unwrap() - .to_thrift(); + let mut buf = Vec::new(); + let mut writer = ThriftCompactOutputProtocol::new(&mut buf); + col_metadata.write_thrift(&mut writer).unwrap(); + let col_chunk_res = read_column_chunk(&mut buf, column_descr).unwrap(); - assert_eq!(col_chunk_res, col_chunk_exp); + assert_eq!(col_chunk_res, col_metadata); } #[test] @@ -2099,17 +1824,10 @@ mod tests { let mut column_index = ColumnIndexBuilder::new(Type::BOOLEAN); column_index.append(false, vec![1u8], vec![2u8, 3u8], 4); - let column_index = column_index.build_to_thrift(); - let native_index = PrimitiveColumnIndex:: { - column_index: ColumnIndex { - null_pages: column_index.null_pages, - boundary_order: column_index.boundary_order.try_into().unwrap(), - null_counts: column_index.null_counts, - repetition_level_histograms: column_index.repetition_level_histograms, - definition_level_histograms: column_index.definition_level_histograms, - }, - min_values: vec![], - max_values: vec![], + let column_index = column_index.build().unwrap(); + let native_index = match column_index { + ColumnIndexMetaData::BOOLEAN(index) => index, + _ => panic!("wrong type of column index"), }; // Now, add in OffsetIndex @@ -2120,20 +1838,18 @@ mod tests { offset_index.append_row_count(1); offset_index.append_offset_and_size(2, 3); offset_index.append_unencoded_byte_array_data_bytes(Some(10)); - let offset_index = offset_index.build_to_thrift(); + let offset_index = offset_index.build(); let parquet_meta = ParquetMetaDataBuilder::new(file_metadata) .set_row_groups(row_group_meta) .set_column_index(Some(vec![vec![ColumnIndexMetaData::BOOLEAN(native_index)]])) - .set_offset_index(Some(vec![vec![ - OffsetIndexMetaData::try_new(offset_index).unwrap() - ]])) + .set_offset_index(Some(vec![vec![offset_index]])) .build(); #[cfg(not(feature = "encryption"))] - let bigger_expected_size = 2736; + let bigger_expected_size = 2738; #[cfg(feature = "encryption")] - let bigger_expected_size = 3168; + let bigger_expected_size = 3170; // more set fields means more memory usage assert!(bigger_expected_size > base_expected_size); diff --git a/parquet/src/file/metadata/parser.rs b/parquet/src/file/metadata/parser.rs index ccfdcaffc69f..cbe005d8f96a 100644 --- a/parquet/src/file/metadata/parser.rs +++ b/parquet/src/file/metadata/parser.rs @@ -29,16 +29,13 @@ use crate::file::page_index::offset_index::OffsetIndexMetaData; use crate::parquet_thrift::{ReadThrift, ThriftSliceInputProtocol}; use bytes::Bytes; -#[cfg(feature = "encryption")] -use crate::encryption::decrypt::FileDecryptionProperties; - /// Helper struct for metadata parsing /// /// This structure parses thrift-encoded bytes into the correct Rust structs, /// such as [`ParquetMetaData`], handling decryption if necessary. // // Note this structure is used to minimize the number of -// places need to add `#[cfg(feature = "encryption")]` checks. +// places to add `#[cfg(feature = "encryption")]` checks. pub(crate) use inner::MetadataParser; #[cfg(feature = "encryption")] @@ -74,13 +71,69 @@ mod inner { buf: &[u8], encrypted_footer: bool, ) -> Result { - decode_metadata_with_encryption( - buf, - encrypted_footer, + crate::file::metadata::thrift_gen::parquet_metadata_with_encryption( self.file_decryption_properties.as_deref(), + encrypted_footer, + buf, ) } } + + pub(super) fn parse_single_column_index( + bytes: &[u8], + metadata: &ParquetMetaData, + column: &ColumnChunkMetaData, + row_group_index: usize, + col_index: usize, + ) -> crate::errors::Result { + use crate::encryption::decrypt::CryptoContext; + match &column.column_crypto_metadata { + Some(crypto_metadata) => { + let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| { + general_err!("Cannot decrypt column index, no file decryptor set") + })?; + let crypto_context = CryptoContext::for_column( + file_decryptor, + crypto_metadata, + row_group_index, + col_index, + )?; + let column_decryptor = crypto_context.metadata_decryptor(); + let aad = crypto_context.create_column_index_aad()?; + let plaintext = column_decryptor.decrypt(bytes, &aad)?; + decode_column_index(&plaintext, column.column_type()) + } + None => decode_column_index(bytes, column.column_type()), + } + } + + pub(super) fn parse_single_offset_index( + bytes: &[u8], + metadata: &ParquetMetaData, + column: &ColumnChunkMetaData, + row_group_index: usize, + col_index: usize, + ) -> crate::errors::Result { + use crate::encryption::decrypt::CryptoContext; + match &column.column_crypto_metadata { + Some(crypto_metadata) => { + let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| { + general_err!("Cannot decrypt offset index, no file decryptor set") + })?; + let crypto_context = CryptoContext::for_column( + file_decryptor, + crypto_metadata, + row_group_index, + col_index, + )?; + let column_decryptor = crypto_context.metadata_decryptor(); + let aad = crypto_context.create_offset_index_aad()?; + let plaintext = column_decryptor.decrypt(bytes, &aad)?; + decode_offset_index(&plaintext) + } + None => decode_offset_index(bytes), + } + } } #[cfg(not(feature = "encryption"))] @@ -112,6 +165,26 @@ mod inner { } } } + + pub(super) fn parse_single_column_index( + bytes: &[u8], + _metadata: &ParquetMetaData, + column: &ColumnChunkMetaData, + _row_group_index: usize, + _col_index: usize, + ) -> crate::errors::Result { + decode_column_index(bytes, column.column_type()) + } + + pub(super) fn parse_single_offset_index( + bytes: &[u8], + _metadata: &ParquetMetaData, + _column: &ColumnChunkMetaData, + _row_group_index: usize, + _col_index: usize, + ) -> crate::errors::Result { + decode_offset_index(bytes) + } } /// Decodes [`ParquetMetaData`] from the provided bytes. @@ -155,7 +228,7 @@ pub(crate) fn parse_column_index( Some(r) => { let r_start = usize::try_from(r.start - start_offset)?; let r_end = usize::try_from(r.end - start_offset)?; - parse_single_column_index( + inner::parse_single_column_index( &bytes[r_start..r_end], metadata, c, @@ -173,46 +246,6 @@ pub(crate) fn parse_column_index( Ok(()) } -#[cfg(feature = "encryption")] -fn parse_single_column_index( - bytes: &[u8], - metadata: &ParquetMetaData, - column: &ColumnChunkMetaData, - row_group_index: usize, - col_index: usize, -) -> crate::errors::Result { - use crate::encryption::decrypt::CryptoContext; - match &column.column_crypto_metadata { - Some(crypto_metadata) => { - let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| { - general_err!("Cannot decrypt column index, no file decryptor set") - })?; - let crypto_context = CryptoContext::for_column( - file_decryptor, - crypto_metadata, - row_group_index, - col_index, - )?; - let column_decryptor = crypto_context.metadata_decryptor(); - let aad = crypto_context.create_column_index_aad()?; - let plaintext = column_decryptor.decrypt(bytes, &aad)?; - decode_column_index(&plaintext, column.column_type()) - } - None => decode_column_index(bytes, column.column_type()), - } -} - -#[cfg(not(feature = "encryption"))] -fn parse_single_column_index( - bytes: &[u8], - _metadata: &ParquetMetaData, - column: &ColumnChunkMetaData, - _row_group_index: usize, - _col_index: usize, -) -> crate::errors::Result { - decode_column_index(bytes, column.column_type()) -} - pub(crate) fn parse_offset_index( metadata: &mut ParquetMetaData, offset_index_policy: PageIndexPolicy, @@ -231,7 +264,13 @@ pub(crate) fn parse_offset_index( Some(r) => { let r_start = usize::try_from(r.start - start_offset)?; let r_end = usize::try_from(r.end - start_offset)?; - parse_single_offset_index(&bytes[r_start..r_end], metadata, c, rg_idx, col_idx) + inner::parse_single_offset_index( + &bytes[r_start..r_end], + metadata, + c, + rg_idx, + col_idx, + ) } None => Err(general_err!("missing offset index")), }; @@ -255,65 +294,3 @@ pub(crate) fn parse_offset_index( metadata.set_offset_index(Some(all_indexes)); Ok(()) } - -#[cfg(feature = "encryption")] -fn parse_single_offset_index( - bytes: &[u8], - metadata: &ParquetMetaData, - column: &ColumnChunkMetaData, - row_group_index: usize, - col_index: usize, -) -> crate::errors::Result { - use crate::encryption::decrypt::CryptoContext; - match &column.column_crypto_metadata { - Some(crypto_metadata) => { - let file_decryptor = metadata.file_decryptor.as_ref().ok_or_else(|| { - general_err!("Cannot decrypt offset index, no file decryptor set") - })?; - let crypto_context = CryptoContext::for_column( - file_decryptor, - crypto_metadata, - row_group_index, - col_index, - )?; - let column_decryptor = crypto_context.metadata_decryptor(); - let aad = crypto_context.create_offset_index_aad()?; - let plaintext = column_decryptor.decrypt(bytes, &aad)?; - decode_offset_index(&plaintext) - } - None => decode_offset_index(bytes), - } -} - -#[cfg(not(feature = "encryption"))] -fn parse_single_offset_index( - bytes: &[u8], - _metadata: &ParquetMetaData, - _column: &ColumnChunkMetaData, - _row_group_index: usize, - _col_index: usize, -) -> crate::errors::Result { - decode_offset_index(bytes) -} - -/// Decodes [`ParquetMetaData`] from the provided bytes, handling metadata that may be encrypted. -/// -/// Typically this is used to decode the metadata from the end of a parquet -/// file. The format of `buf` is the Thrift compact binary protocol, as specified -/// by the [Parquet Spec]. Buffer can be encrypted with AES GCM or AES CTR -/// ciphers as specfied in the [Parquet Encryption Spec]. -/// -/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata -/// [Parquet Encryption Spec]: https://parquet.apache.org/docs/file-format/data-pages/encryption/ -#[cfg(feature = "encryption")] -fn decode_metadata_with_encryption( - buf: &[u8], - encrypted_footer: bool, - file_decryption_properties: Option<&FileDecryptionProperties>, -) -> crate::errors::Result { - super::thrift_gen::parquet_metadata_with_encryption( - file_decryption_properties, - encrypted_footer, - buf, - ) -} diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs index 062a903c79f4..7a0b32bfe12e 100644 --- a/parquet/src/file/metadata/thrift_gen.rs +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -28,10 +28,9 @@ use crate::{ errors::{ParquetError, Result}, file::{ metadata::{ - ColumnChunkMetaData, KeyValue, LevelHistogram, ParquetMetaData, RowGroupMetaData, - SortingColumn, + ColumnChunkMetaData, KeyValue, LevelHistogram, PageEncodingStats, ParquetMetaData, + RowGroupMetaData, SortingColumn, }, - page_encoding_stats::PageEncodingStats, statistics::ValueStatistics, }, parquet_thrift::{ @@ -55,16 +54,41 @@ use crate::{ // this needs to be visible to the schema conversion code thrift_struct!( pub(crate) struct SchemaElement<'a> { - /** Data type for this field. Not set if the current element is a non-leaf node */ - 1: optional Type type_; + /// Data type for this field. Not set if the current element is a non-leaf node + 1: optional Type r#type; + /// If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. + /// Otherwise, if specified, this is the maximum bit length to store any of the values. + /// (e.g. a low cardinality INT col could have this set to 3). Note that this is + /// in the schema, and therefore fixed for the entire file. 2: optional i32 type_length; + /// Repetition of the field. The root of the schema does not have a repetition_type. + /// All other nodes must have one. 3: optional Repetition repetition_type; + /// Name of the field in the schema 4: required string<'a> name; + /// Nested fields. Since thrift does not support nested fields, + /// the nesting is flattened to a single list by a depth-first traversal. + /// The children count is used to construct the nested relationship. + /// This field is not set when the element is a primitive type. 5: optional i32 num_children; + /// DEPRECATED: When the schema is the result of a conversion from another model. + /// Used to record the original type to help with cross conversion. + /// + /// This is superseded by logical_type. 6: optional ConvertedType converted_type; + /// DEPRECATED: Used when this column contains decimal data. + /// See the DECIMAL converted type for more details. + /// + /// This is superseded by using the DecimalType annotation in logical_type. 7: optional i32 scale 8: optional i32 precision + /// When the original schema supports field ids, this will save the + /// original field id in the parquet schema 9: optional i32 field_id; + /// The logical type of this SchemaElement + /// + /// LogicalType replaces ConvertedType, but ConvertedType is still required + /// for some logical types to ensure forward-compatibility in format v1. 10: optional LogicalType logical_type } ); @@ -107,31 +131,30 @@ union EncryptionAlgorithm { #[cfg(feature = "encryption")] thrift_struct!( /// Crypto metadata for files with encrypted footer -pub(crate) struct FileCryptoMetaData { +pub(crate) struct FileCryptoMetaData<'a> { /// Encryption algorithm. This field is only used for files /// with encrypted footer. Files with plaintext footer store algorithm id /// inside footer (FileMetaData structure). 1: required EncryptionAlgorithm encryption_algorithm - /** Retrieval metadata of key used for encryption of footer, - * and (possibly) columns **/ - 2: optional binary key_metadata + /// Retrieval metadata of key used for encryption of footer, + /// and (possibly) columns. + 2: optional binary<'a> key_metadata } ); // the following are only used internally so are private thrift_struct!( struct FileMetaData<'a> { - /** Version of this file **/ 1: required i32 version 2: required list<'a> schema; 3: required i64 num_rows 4: required list<'a> row_groups 5: optional list key_value_metadata - 6: optional string created_by + 6: optional string<'a> created_by 7: optional list column_orders; 8: optional EncryptionAlgorithm encryption_algorithm - 9: optional binary footer_signing_key_metadata + 9: optional binary<'a> footer_signing_key_metadata } ); @@ -165,7 +188,7 @@ struct ColumnChunk<'a> { #[cfg(not(feature = "encryption"))] thrift_struct!( struct ColumnChunk<'a> { - 1: optional string file_path + 1: optional string<'a> file_path 2: required i64 file_offset = 0 3: optional ColumnMetaData<'a> meta_data 4: optional i64 offset_index_offset @@ -178,7 +201,7 @@ struct ColumnChunk<'a> { type CompressionCodec = Compression; thrift_struct!( struct ColumnMetaData<'a> { - 1: required Type type_ + 1: required Type r#type 2: required list encodings // we don't expose path_in_schema so skip //3: required list path_in_schema @@ -215,9 +238,7 @@ struct BoundingBox { thrift_struct!( struct GeospatialStatistics { - /** A bounding box of geospatial instances */ 1: optional BoundingBox bbox; - /** Geospatial type codes of all instances, or an empty list if not known */ 2: optional list geospatial_types; } ); @@ -260,6 +281,14 @@ fn convert_row_group( row_group: RowGroup, schema_descr: Arc, ) -> Result { + if schema_descr.num_columns() != row_group.columns.len() { + return Err(general_err!( + "Column count mismatch. Schema has {} columns while Row Group has {}", + schema_descr.num_columns(), + row_group.columns.len() + )); + } + let num_rows = row_group.num_rows; let sorting_columns = row_group.sorting_columns; let total_byte_size = row_group.total_byte_size; @@ -299,7 +328,7 @@ fn convert_column( return Err(general_err!("Expected to have column metadata")); } let col_metadata = column.meta_data.unwrap(); - let column_type = col_metadata.type_; + let column_type = col_metadata.r#type; let encodings = col_metadata.encodings; let compression = col_metadata.codec; let file_path = column.file_path.map(|v| v.to_owned()); @@ -643,6 +672,15 @@ fn row_group_from_encrypted_thrift( } #[cfg(feature = "encryption")] +/// Decodes [`ParquetMetaData`] from the provided bytes, handling metadata that may be encrypted. +/// +/// Typically this is used to decode the metadata from the end of a parquet +/// file. The format of `buf` is the Thrift compact binary protocol, as specified +/// by the [Parquet Spec]. Buffer can be encrypted with AES GCM or AES CTR +/// ciphers as specfied in the [Parquet Encryption Spec]. +/// +/// [Parquet Spec]: https://github.com/apache/parquet-format#metadata +/// [Parquet Encryption Spec]: https://parquet.apache.org/docs/file-format/data-pages/encryption/ pub(crate) fn parquet_metadata_with_encryption( file_decryption_properties: Option<&FileDecryptionProperties>, encrypted_footer: bool, @@ -670,7 +708,7 @@ pub(crate) fn parquet_metadata_with_encryption( } let decryptor = get_file_decryptor( t_file_crypto_metadata.encryption_algorithm, - t_file_crypto_metadata.key_metadata.as_ref(), + t_file_crypto_metadata.key_metadata, file_decryption_properties, )?; let footer_decryptor = decryptor.get_footer_decryptor(); @@ -693,7 +731,7 @@ pub(crate) fn parquet_metadata_with_encryption( } } - let file_meta = super::thrift_gen::FileMetaData::read_thrift(&mut prot) + let file_meta = FileMetaData::read_thrift(&mut prot) .map_err(|e| general_err!("Could not parse metadata: {}", e))?; let version = file_meta.version; @@ -710,7 +748,7 @@ pub(crate) fn parquet_metadata_with_encryption( // File has a plaintext footer but encryption algorithm is set let file_decryptor_value = get_file_decryptor( algo, - file_meta.footer_signing_key_metadata.as_ref(), + file_meta.footer_signing_key_metadata, file_decryption_properties, )?; if file_decryption_properties.check_plaintext_footer_integrity() && !encrypted_footer { @@ -769,9 +807,9 @@ pub(crate) fn parquet_metadata_with_encryption( } #[cfg(feature = "encryption")] -pub(super) fn get_file_decryptor( +fn get_file_decryptor( encryption_algorithm: EncryptionAlgorithm, - footer_key_metadata: Option<&Vec>, + footer_key_metadata: Option<&[u8]>, file_decryption_properties: &FileDecryptionProperties, ) -> Result { match encryption_algorithm { @@ -788,7 +826,7 @@ pub(super) fn get_file_decryptor( FileDecryptor::new( file_decryption_properties, - footer_key_metadata.map(|v| v.as_slice()), + footer_key_metadata, aad_file_unique, aad_prefix, ) @@ -803,7 +841,7 @@ pub(super) fn get_file_decryptor( /// the Parquet footer. Page indexes will need to be added later. impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for ParquetMetaData { fn read_thrift(prot: &mut R) -> Result { - let file_meta = super::thrift_gen::FileMetaData::read_thrift(prot)?; + let file_meta = FileMetaData::read_thrift(prot)?; let version = file_meta.version; let num_rows = file_meta.num_rows; @@ -1088,7 +1126,7 @@ impl DataPageHeaderV2 { thrift_struct!( pub(crate) struct PageHeader { /// the type of the page: indicates which of the *_header fields is set - 1: required PageType type_ + 1: required PageType r#type /// Uncompressed page size in bytes (not including this header) 2: required i32 uncompressed_page_size @@ -1184,7 +1222,7 @@ impl PageHeader { )); }; Ok(Self { - type_, + r#type: type_, uncompressed_page_size, compressed_page_size, crc, @@ -1312,7 +1350,7 @@ impl<'a> WriteThrift for FileMeta<'a> { // field 2 is schema. do depth-first traversal of tree, converting to SchemaElement and // writing along the way. let root = self.file_metadata.schema_descr().root_schema_ptr(); - let schema_len = num_nodes(&root); + let schema_len = num_nodes(&root)?; writer.write_field_begin(FieldType::List, 2, 1)?; writer.write_list_begin(ElementType::Struct, schema_len)?; // recursively write Type nodes as SchemaElements @@ -1347,6 +1385,16 @@ impl<'a> WriteThrift for FileMeta<'a> { } fn write_schema( + schema: &TypePtr, + writer: &mut ThriftCompactOutputProtocol, +) -> Result<()> { + if !schema.is_group() { + return Err(general_err!("Root schema must be Group type")); + } + write_schema_helper(schema, writer) +} + +fn write_schema_helper( node: &TypePtr, writer: &mut ThriftCompactOutputProtocol, ) -> Result<()> { @@ -1359,7 +1407,7 @@ fn write_schema( precision, } => { let element = SchemaElement { - type_: Some(*physical_type), + r#type: Some(*physical_type), type_length: if *type_length >= 0 { Some(*type_length) } else { @@ -1395,7 +1443,7 @@ fn write_schema( }; let element = SchemaElement { - type_: None, + r#type: None, type_length: None, repetition_type: repetition, name: basic_info.name(), @@ -1418,7 +1466,7 @@ fn write_schema( // Add child elements for a group for field in fields { - write_schema(field, writer)?; + write_schema_helper(field, writer)?; } Ok(()) } @@ -1611,9 +1659,75 @@ impl WriteThriftField for crate::geospatial::bounding_box::BoundingBox { } #[cfg(test)] -mod tests { - use crate::file::metadata::thrift_gen::BoundingBox; +pub(crate) mod tests { + use crate::errors::Result; + use crate::file::metadata::thrift_gen::{ + convert_column, convert_row_group, write_schema, BoundingBox, ColumnChunk, RowGroup, + SchemaElement, + }; + use crate::file::metadata::{ColumnChunkMetaData, RowGroupMetaData}; use crate::parquet_thrift::tests::test_roundtrip; + use crate::parquet_thrift::{ + read_thrift_vec, ElementType, ReadThrift, ThriftCompactOutputProtocol, + ThriftSliceInputProtocol, + }; + use crate::schema::types::{ + num_nodes, parquet_schema_from_array, ColumnDescriptor, SchemaDescriptor, TypePtr, + }; + use std::sync::Arc; + + // for testing. decode thrift encoded RowGroup + pub(crate) fn read_row_group( + buf: &mut [u8], + schema_descr: Arc, + ) -> Result { + let mut reader = ThriftSliceInputProtocol::new(buf); + let rg = RowGroup::read_thrift(&mut reader)?; + convert_row_group(rg, schema_descr) + } + + pub(crate) fn read_column_chunk( + buf: &mut [u8], + column_descr: Arc, + ) -> Result { + let mut reader = ThriftSliceInputProtocol::new(buf); + let cc = ColumnChunk::read_thrift(&mut reader)?; + convert_column(cc, column_descr) + } + + pub(crate) fn roundtrip_schema(schema: TypePtr) -> Result { + let num_nodes = num_nodes(&schema)?; + let mut buf = Vec::new(); + let mut writer = ThriftCompactOutputProtocol::new(&mut buf); + + // kick off writing list + writer.write_list_begin(ElementType::Struct, num_nodes)?; + + // write SchemaElements + write_schema(&schema, &mut writer)?; + + let mut prot = ThriftSliceInputProtocol::new(&buf); + let se: Vec = read_thrift_vec(&mut prot)?; + parquet_schema_from_array(se) + } + + pub(crate) fn schema_to_buf(schema: &TypePtr) -> Result> { + let num_nodes = num_nodes(schema)?; + let mut buf = Vec::new(); + let mut writer = ThriftCompactOutputProtocol::new(&mut buf); + + // kick off writing list + writer.write_list_begin(ElementType::Struct, num_nodes)?; + + // write SchemaElements + write_schema(schema, &mut writer)?; + Ok(buf) + } + + pub(crate) fn buf_to_schema_list<'a>(buf: &'a mut Vec) -> Result>> { + let mut prot = ThriftSliceInputProtocol::new(buf.as_mut_slice()); + read_thrift_vec(&mut prot) + } #[test] fn test_bounding_box_roundtrip() { diff --git a/parquet/src/file/metadata/writer.rs b/parquet/src/file/metadata/writer.rs index 6396e454fb09..97d008e17308 100644 --- a/parquet/src/file/metadata/writer.rs +++ b/parquet/src/file/metadata/writer.rs @@ -317,7 +317,7 @@ impl<'a, W: Write> ThriftMetadataWriter<'a, W> { /// 4. Length of encoded `FileMetaData` (4 bytes, little endian) /// 5. Parquet Magic Bytes (4 bytes) /// -/// [`FileMetaData`]: crate::format::FileMetaData +/// [`FileMetaData`]: https://github.com/apache/parquet-format/tree/master?tab=readme-ov-file#metadata /// [`ColumnChunkMetaData`]: crate::file::metadata::ColumnChunkMetaData /// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md /// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md @@ -431,7 +431,7 @@ impl<'a, W: Write> ParquetMetaDataWriter<'a, W> { } fn convert_column_indexes(&self) -> Option>>> { - // FIXME(ets): we're converting from ParquetColumnIndex to vec>, + // TODO(ets): we're converting from ParquetColumnIndex to vec>, // but then converting back to ParquetColumnIndex in the end. need to unify this. self.metadata .column_index() @@ -543,7 +543,7 @@ impl MetadataObjectWriter { /// Write [`FileMetaData`] in Thrift format, possibly encrypting it if required /// - /// [`FileMetaData`]: crate::format::FileMetaData + /// [`FileMetaData`]: https://github.com/apache/parquet-format/tree/master?tab=readme-ov-file#metadata fn write_file_metadata(&self, file_metadata: &FileMeta, mut sink: impl Write) -> Result<()> { match self.file_encryptor.as_ref() { Some(file_encryptor) if file_encryptor.properties().encrypt_footer() => { @@ -711,11 +711,11 @@ impl MetadataObjectWriter { }) } - fn file_crypto_metadata(file_encryptor: &FileEncryptor) -> Result { + fn file_crypto_metadata(file_encryptor: &'_ FileEncryptor) -> Result> { let properties = file_encryptor.properties(); Ok(FileCryptoMetaData { encryption_algorithm: Self::encryption_algorithm_from_encryptor(file_encryptor), - key_metadata: properties.footer_key_metadata().cloned(), + key_metadata: properties.footer_key_metadata().map(|v| v.as_slice()), }) } diff --git a/parquet/src/file/mod.rs b/parquet/src/file/mod.rs index 976b36dc2358..09036cd7d7b9 100644 --- a/parquet/src/file/mod.rs +++ b/parquet/src/file/mod.rs @@ -100,7 +100,6 @@ #[cfg(feature = "encryption")] pub mod column_crypto_metadata; pub mod metadata; -pub mod page_encoding_stats; pub mod page_index; pub mod properties; pub mod reader; diff --git a/parquet/src/file/page_encoding_stats.rs b/parquet/src/file/page_encoding_stats.rs deleted file mode 100644 index 3f81353e28dd..000000000000 --- a/parquet/src/file/page_encoding_stats.rs +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//! Per-page encoding information. - -use std::io::Write; - -use crate::basic::{Encoding, PageType}; -use crate::errors::{ParquetError, Result}; -use crate::parquet_thrift::{ - ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol, - WriteThrift, WriteThriftField, -}; -use crate::thrift_struct; - -// TODO: This should probably all be moved to thrift_gen -thrift_struct!( -/// PageEncodingStats for a column chunk and data page. -pub struct PageEncodingStats { - 1: required PageType page_type; - 2: required Encoding encoding; - 3: required i32 count; -} -); - -/// Converts Thrift definition into `PageEncodingStats`. -pub fn try_from_thrift( - thrift_encoding_stats: &crate::format::PageEncodingStats, -) -> Result { - let page_type = PageType::try_from(thrift_encoding_stats.page_type)?; - let encoding = Encoding::try_from(thrift_encoding_stats.encoding)?; - let count = thrift_encoding_stats.count; - - Ok(PageEncodingStats { - page_type, - encoding, - count, - }) -} - -/// Converts `PageEncodingStats` into Thrift definition. -pub fn to_thrift(encoding_stats: &PageEncodingStats) -> crate::format::PageEncodingStats { - let page_type = crate::format::PageType::from(encoding_stats.page_type); - let encoding = crate::format::Encoding::from(encoding_stats.encoding); - let count = encoding_stats.count; - - crate::format::PageEncodingStats { - page_type, - encoding, - count, - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_page_encoding_stats_from_thrift() { - let stats = PageEncodingStats { - page_type: PageType::DATA_PAGE, - encoding: Encoding::PLAIN, - count: 1, - }; - - assert_eq!(try_from_thrift(&to_thrift(&stats)).unwrap(), stats); - } -} diff --git a/parquet/src/file/page_index/column_index.rs b/parquet/src/file/page_index/column_index.rs index a0893cc9eae9..2aa155a2825d 100644 --- a/parquet/src/file/page_index/column_index.rs +++ b/parquet/src/file/page_index/column_index.rs @@ -17,7 +17,7 @@ //! [`ColumnIndexMetaData`] structures holding decoded [`ColumnIndex`] information //! -//! [`ColumnIndex`]: crate::format::ColumnIndex +//! [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md //! use crate::{ @@ -499,7 +499,12 @@ macro_rules! colidx_enum_func { }}; } -/// index +/// Parsed [`ColumnIndex`] information for a Parquet file. +/// +/// See [`ParquetColumnIndex`] for more information. +/// +/// [`ParquetColumnIndex`]: crate::file::metadata::ParquetColumnIndex +/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md #[derive(Debug, Clone, PartialEq)] #[allow(non_camel_case_types)] pub enum ColumnIndexMetaData { diff --git a/parquet/src/file/page_index/index_reader.rs b/parquet/src/file/page_index/index_reader.rs index fbf97ad92cce..fd10b9fe8b3c 100644 --- a/parquet/src/file/page_index/index_reader.rs +++ b/parquet/src/file/page_index/index_reader.rs @@ -54,7 +54,7 @@ pub(crate) fn acc_range(a: Option>, b: Option>) -> Option< /// See [Page Index Documentation] for more details. /// /// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -/// [`ColumnIndex`]: crate::format::ColumnIndex +/// [`ColumnIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md #[deprecated( since = "55.2.0", note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0" @@ -100,7 +100,7 @@ pub fn read_columns_indexes( /// See [Page Index Documentation] for more details. /// /// [Page Index Documentation]: https://github.com/apache/parquet-format/blob/master/PageIndex.md -/// [`OffsetIndex`]: crate::format::OffsetIndex +/// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md #[deprecated( since = "55.2.0", note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0" diff --git a/parquet/src/file/page_index/offset_index.rs b/parquet/src/file/page_index/offset_index.rs index 30b58ce0acb3..d79da37824c8 100644 --- a/parquet/src/file/page_index/offset_index.rs +++ b/parquet/src/file/page_index/offset_index.rs @@ -44,30 +44,13 @@ pub struct PageLocation { } ); -impl From<&crate::format::PageLocation> for PageLocation { - fn from(value: &crate::format::PageLocation) -> Self { - Self { - offset: value.offset, - compressed_page_size: value.compressed_page_size, - first_row_index: value.first_row_index, - } - } -} - -impl From<&PageLocation> for crate::format::PageLocation { - fn from(value: &PageLocation) -> Self { - Self { - offset: value.offset, - compressed_page_size: value.compressed_page_size, - first_row_index: value.first_row_index, - } - } -} - thrift_struct!( /// [`OffsetIndex`] information for a column chunk. Contains offsets and sizes for each page /// in the chunk. Optionally stores fully decoded page sizes for BYTE_ARRAY columns. /// +/// See [`ParquetOffsetIndex`] for more information. +/// +/// [`ParquetOffsetIndex`]: crate::file::metadata::ParquetOffsetIndex /// [`OffsetIndex`]: https://github.com/apache/parquet-format/blob/master/PageIndex.md pub struct OffsetIndexMetaData { /// Vector of [`PageLocation`] objects, one per page in the chunk. @@ -79,18 +62,6 @@ pub struct OffsetIndexMetaData { ); impl OffsetIndexMetaData { - /// Creates a new [`OffsetIndexMetaData`] from an [`OffsetIndex`]. - /// - /// [`OffsetIndex`]: crate::format::OffsetIndex - #[allow(dead_code)] - pub(crate) fn try_new(index: crate::format::OffsetIndex) -> Result { - let page_locations = index.page_locations.iter().map(|loc| loc.into()).collect(); - Ok(Self { - page_locations, - unencoded_byte_array_data_bytes: index.unencoded_byte_array_data_bytes, - }) - } - /// Vector of [`PageLocation`] objects, one per page in the chunk. pub fn page_locations(&self) -> &Vec { &self.page_locations diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs index b0d64ea76017..c47c118e43bb 100644 --- a/parquet/src/file/serialized_reader.rs +++ b/parquet/src/file/serialized_reader.rs @@ -413,7 +413,7 @@ pub(crate) fn decode_page( _ => buffer, }; - let result = match page_header.type_ { + let result = match page_header.r#type { PageType::DICTIONARY_PAGE => { let dict_header = page_header.dictionary_page_header.as_ref().ok_or_else(|| { ParquetError::General("Missing dictionary page header".to_string()) @@ -458,7 +458,7 @@ pub(crate) fn decode_page( } _ => { // For unknown page type (e.g., INDEX_PAGE), skip and read next. - unimplemented!("Page type {:?} is not supported", page_header.type_) + unimplemented!("Page type {:?} is not supported", page_header.r#type) } }; @@ -894,7 +894,7 @@ impl PageReader for SerializedPageReader { *offset += data_len as u64; *remaining -= data_len as u64; - if header.type_ == PageType::INDEX_PAGE { + if header.r#type == PageType::INDEX_PAGE { continue; } diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs index 38c0d1ff06a0..0c54940fac3b 100644 --- a/parquet/src/file/statistics.rs +++ b/parquet/src/file/statistics.rs @@ -118,156 +118,6 @@ macro_rules! statistics_enum_func { }}; } -// FIXME(ets): remove this when done with format changes -/// Converts Thrift definition into `Statistics`. -pub fn from_thrift( - physical_type: Type, - thrift_stats: Option, -) -> Result> { - Ok(match thrift_stats { - Some(stats) => { - // Number of nulls recorded, when it is not available, we just mark it as 0. - // TODO this should be `None` if there is no information about NULLS. - // see https://github.com/apache/arrow-rs/pull/6216/files - let null_count = stats.null_count.unwrap_or(0); - - if null_count < 0 { - return Err(ParquetError::General(format!( - "Statistics null count is negative {null_count}", - ))); - } - - // Generic null count. - let null_count = Some(null_count as u64); - // Generic distinct count (count of distinct values occurring) - let distinct_count = stats.distinct_count.map(|value| value as u64); - // Whether or not statistics use deprecated min/max fields. - let old_format = stats.min_value.is_none() && stats.max_value.is_none(); - // Generic min value as bytes. - let min = if old_format { - stats.min - } else { - stats.min_value - }; - // Generic max value as bytes. - let max = if old_format { - stats.max - } else { - stats.max_value - }; - - fn check_len(min: &Option>, max: &Option>, len: usize) -> Result<()> { - if let Some(min) = min { - if min.len() < len { - return Err(ParquetError::General( - "Insufficient bytes to parse min statistic".to_string(), - )); - } - } - if let Some(max) = max { - if max.len() < len { - return Err(ParquetError::General( - "Insufficient bytes to parse max statistic".to_string(), - )); - } - } - Ok(()) - } - - match physical_type { - Type::BOOLEAN => check_len(&min, &max, 1), - Type::INT32 | Type::FLOAT => check_len(&min, &max, 4), - Type::INT64 | Type::DOUBLE => check_len(&min, &max, 8), - Type::INT96 => check_len(&min, &max, 12), - _ => Ok(()), - }?; - - // Values are encoded using PLAIN encoding definition, except that - // variable-length byte arrays do not include a length prefix. - // - // Instead of using actual decoder, we manually convert values. - let res = match physical_type { - Type::BOOLEAN => Statistics::boolean( - min.map(|data| data[0] != 0), - max.map(|data| data[0] != 0), - distinct_count, - null_count, - old_format, - ), - Type::INT32 => Statistics::int32( - min.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), - max.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())), - distinct_count, - null_count, - old_format, - ), - Type::INT64 => Statistics::int64( - min.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), - max.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())), - distinct_count, - null_count, - old_format, - ), - Type::INT96 => { - // INT96 statistics may not be correct, because comparison is signed - let min = if let Some(data) = min { - assert_eq!(data.len(), 12); - Some(Int96::try_from_le_slice(&data)?) - } else { - None - }; - let max = if let Some(data) = max { - assert_eq!(data.len(), 12); - Some(Int96::try_from_le_slice(&data)?) - } else { - None - }; - Statistics::int96(min, max, distinct_count, null_count, old_format) - } - Type::FLOAT => Statistics::float( - min.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), - max.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())), - distinct_count, - null_count, - old_format, - ), - Type::DOUBLE => Statistics::double( - min.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), - max.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())), - distinct_count, - null_count, - old_format, - ), - Type::BYTE_ARRAY => Statistics::ByteArray( - ValueStatistics::new( - min.map(ByteArray::from), - max.map(ByteArray::from), - distinct_count, - null_count, - old_format, - ) - .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false)) - .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)), - ), - Type::FIXED_LEN_BYTE_ARRAY => Statistics::FixedLenByteArray( - ValueStatistics::new( - min.map(ByteArray::from).map(FixedLenByteArray::from), - max.map(ByteArray::from).map(FixedLenByteArray::from), - distinct_count, - null_count, - old_format, - ) - .with_max_is_exact(stats.is_max_value_exact.unwrap_or(false)) - .with_min_is_exact(stats.is_min_value_exact.unwrap_or(false)), - ), - }; - - Some(res) - } - None => None, - }) -} - /// Converts Thrift definition into `Statistics`. pub(crate) fn from_thrift_page_stats( physical_type: Type, @@ -417,56 +267,6 @@ pub(crate) fn from_thrift_page_stats( }) } -// FIXME(ets): remove when done with format changes -/// Convert Statistics into Thrift definition. -pub fn to_thrift(stats: Option<&Statistics>) -> Option { - let stats = stats?; - - // record null count if it can fit in i64 - let null_count = stats - .null_count_opt() - .and_then(|value| i64::try_from(value).ok()); - - // record distinct count if it can fit in i64 - let distinct_count = stats - .distinct_count_opt() - .and_then(|value| i64::try_from(value).ok()); - - let mut thrift_stats = crate::format::Statistics { - max: None, - min: None, - null_count, - distinct_count, - max_value: None, - min_value: None, - is_max_value_exact: None, - is_min_value_exact: None, - }; - - // Get min/max if set. - let (min, max, min_exact, max_exact) = ( - stats.min_bytes_opt().map(|x| x.to_vec()), - stats.max_bytes_opt().map(|x| x.to_vec()), - Some(stats.min_is_exact()), - Some(stats.max_is_exact()), - ); - if stats.is_min_max_backwards_compatible() { - // Copy to deprecated min, max values for compatibility with older readers - thrift_stats.min.clone_from(&min); - thrift_stats.max.clone_from(&max); - } - - if !stats.is_min_max_deprecated() { - thrift_stats.min_value = min; - thrift_stats.max_value = max; - } - - thrift_stats.is_min_value_exact = min_exact; - thrift_stats.is_max_value_exact = max_exact; - - Some(thrift_stats) -} - /// Convert Statistics into Thrift definition. pub(crate) fn page_stats_to_thrift(stats: Option<&Statistics>) -> Option { let stats = stats?; @@ -900,7 +700,7 @@ mod tests { #[test] #[should_panic(expected = "General(\"Statistics null count is negative -10\")")] fn test_statistics_negative_null_count() { - let thrift_stats = crate::format::Statistics { + let thrift_stats = PageStatistics { max: None, min: None, null_count: Some(-10), @@ -911,13 +711,16 @@ mod tests { is_min_value_exact: None, }; - from_thrift(Type::INT32, Some(thrift_stats)).unwrap(); + from_thrift_page_stats(Type::INT32, Some(thrift_stats)).unwrap(); } #[test] fn test_statistics_thrift_none() { - assert_eq!(from_thrift(Type::INT32, None).unwrap(), None); - assert_eq!(from_thrift(Type::BYTE_ARRAY, None).unwrap(), None); + assert_eq!(from_thrift_page_stats(Type::INT32, None).unwrap(), None); + assert_eq!( + from_thrift_page_stats(Type::BYTE_ARRAY, None).unwrap(), + None + ); } #[test] @@ -1062,8 +865,11 @@ mod tests { // Helper method to check statistics conversion. fn check_stats(stats: Statistics) { let tpe = stats.physical_type(); - let thrift_stats = to_thrift(Some(&stats)); - assert_eq!(from_thrift(tpe, thrift_stats).unwrap(), Some(stats)); + let thrift_stats = page_stats_to_thrift(Some(&stats)); + assert_eq!( + from_thrift_page_stats(tpe, thrift_stats).unwrap(), + Some(stats) + ); } check_stats(Statistics::boolean( @@ -1199,7 +1005,7 @@ mod tests { fn test_count_encoding_distinct_too_large() { // statistics are stored using i64, so test trying to store larger values let statistics = make_bool_stats(Some(u64::MAX), Some(100)); - let thrift_stats = to_thrift(Some(&statistics)).unwrap(); + let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap(); assert_eq!(thrift_stats.distinct_count, None); // can't store u64 max --> null assert_eq!(thrift_stats.null_count, Some(100)); } @@ -1208,18 +1014,24 @@ mod tests { fn test_count_encoding_null_too_large() { // statistics are stored using i64, so test trying to store larger values let statistics = make_bool_stats(Some(100), Some(u64::MAX)); - let thrift_stats = to_thrift(Some(&statistics)).unwrap(); + let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap(); assert_eq!(thrift_stats.distinct_count, Some(100)); assert_eq!(thrift_stats.null_count, None); // can' store u64 max --> null } #[test] fn test_count_decoding_null_invalid() { - let tstatistics = crate::format::Statistics { + let tstatistics = PageStatistics { null_count: Some(-42), - ..Default::default() + max: None, + min: None, + distinct_count: None, + max_value: None, + min_value: None, + is_max_value_exact: None, + is_min_value_exact: None, }; - let err = from_thrift(Type::BOOLEAN, Some(tstatistics)).unwrap_err(); + let err = from_thrift_page_stats(Type::BOOLEAN, Some(tstatistics)).unwrap_err(); assert_eq!( err.to_string(), "Parquet error: Statistics null count is negative -42" @@ -1232,14 +1044,14 @@ mod tests { fn statistics_count_test(distinct_count: Option, null_count: Option) { let statistics = make_bool_stats(distinct_count, null_count); - let thrift_stats = to_thrift(Some(&statistics)).unwrap(); + let thrift_stats = page_stats_to_thrift(Some(&statistics)).unwrap(); assert_eq!(thrift_stats.null_count.map(|c| c as u64), null_count); assert_eq!( thrift_stats.distinct_count.map(|c| c as u64), distinct_count ); - let round_tripped = from_thrift(Type::BOOLEAN, Some(thrift_stats)) + let round_tripped = from_thrift_page_stats(Type::BOOLEAN, Some(thrift_stats)) .unwrap() .unwrap(); // TODO: remove branch when we no longer support assuming null_count==None in the thrift diff --git a/parquet/src/file/writer.rs b/parquet/src/file/writer.rs index 1ce7ad29123c..a6c13cfa2cb0 100644 --- a/parquet/src/file/writer.rs +++ b/parquet/src/file/writer.rs @@ -1044,10 +1044,11 @@ mod tests { use crate::file::page_index::column_index::ColumnIndexMetaData; use crate::file::properties::EnabledStatistics; use crate::file::serialized_reader::ReadOptionsBuilder; + use crate::file::statistics::{from_thrift_page_stats, page_stats_to_thrift}; use crate::file::{ properties::{ReaderProperties, WriterProperties, WriterVersion}, reader::{FileReader, SerializedFileReader, SerializedPageReader}, - statistics::{from_thrift, to_thrift, Statistics}, + statistics::Statistics, }; use crate::record::{Row, RowAccessor}; use crate::schema::parser::parse_message_type; @@ -1498,8 +1499,11 @@ mod tests { encoding, def_level_encoding, rep_level_encoding, - statistics: from_thrift(physical_type, to_thrift(statistics.as_ref())) - .unwrap(), + statistics: from_thrift_page_stats( + physical_type, + page_stats_to_thrift(statistics.as_ref()), + ) + .unwrap(), } } Page::DataPageV2 { @@ -1528,8 +1532,11 @@ mod tests { def_levels_byte_len, rep_levels_byte_len, is_compressed: compressor.is_some(), - statistics: from_thrift(physical_type, to_thrift(statistics.as_ref())) - .unwrap(), + statistics: from_thrift_page_stats( + physical_type, + page_stats_to_thrift(statistics.as_ref()), + ) + .unwrap(), } } Page::DictionaryPage { @@ -1620,7 +1627,10 @@ mod tests { assert_eq!(&left.buffer(), &right.buffer()); assert_eq!(left.num_values(), right.num_values()); assert_eq!(left.encoding(), right.encoding()); - assert_eq!(to_thrift(left.statistics()), to_thrift(right.statistics())); + assert_eq!( + page_stats_to_thrift(left.statistics()), + page_stats_to_thrift(right.statistics()) + ); } /// Tests roundtrip of i32 data written using `W` and read using `R` @@ -1887,29 +1897,22 @@ mod tests { let metadata = row_group_writer.close().unwrap(); writer.close().unwrap(); - let thrift = metadata.to_thrift(); - let encoded_stats: Vec<_> = thrift - .columns - .into_iter() - .map(|x| x.meta_data.unwrap().statistics.unwrap()) - .collect(); - // decimal - let s = &encoded_stats[0]; + let s = page_stats_to_thrift(metadata.column(0).statistics()).unwrap(); assert_eq!(s.min.as_deref(), Some(1_i32.to_le_bytes().as_ref())); assert_eq!(s.max.as_deref(), Some(3_i32.to_le_bytes().as_ref())); assert_eq!(s.min_value.as_deref(), Some(1_i32.to_le_bytes().as_ref())); assert_eq!(s.max_value.as_deref(), Some(3_i32.to_le_bytes().as_ref())); // i32 - let s = &encoded_stats[1]; + let s = page_stats_to_thrift(metadata.column(1).statistics()).unwrap(); assert_eq!(s.min.as_deref(), Some(1_i32.to_le_bytes().as_ref())); assert_eq!(s.max.as_deref(), Some(3_i32.to_le_bytes().as_ref())); assert_eq!(s.min_value.as_deref(), Some(1_i32.to_le_bytes().as_ref())); assert_eq!(s.max_value.as_deref(), Some(3_i32.to_le_bytes().as_ref())); // u32 - let s = &encoded_stats[2]; + let s = page_stats_to_thrift(metadata.column(2).statistics()).unwrap(); assert_eq!(s.min.as_deref(), None); assert_eq!(s.max.as_deref(), None); assert_eq!(s.min_value.as_deref(), Some(1_i32.to_le_bytes().as_ref())); diff --git a/parquet/src/geospatial/bounding_box.rs b/parquet/src/geospatial/bounding_box.rs index aa6798eb8da2..ce23696afcf3 100644 --- a/parquet/src/geospatial/bounding_box.rs +++ b/parquet/src/geospatial/bounding_box.rs @@ -21,7 +21,6 @@ //! Derived from the parquet format spec: //! //! -use crate::format as parquet; /// A geospatial instance has at least two coordinate dimensions: X and Y for 2D coordinates of each point. /// X represents longitude/easting and Y represents latitude/northing. A geospatial instance can optionally @@ -171,22 +170,6 @@ impl BoundingBox { } } -impl From for parquet::BoundingBox { - /// Converts our internal `BoundingBox` to the Thrift-generated format. - fn from(b: BoundingBox) -> parquet::BoundingBox { - parquet::BoundingBox { - xmin: b.x_range.0.into(), - xmax: b.x_range.1.into(), - ymin: b.y_range.0.into(), - ymax: b.y_range.1.into(), - zmin: b.z_range.map(|z| z.0.into()), - zmax: b.z_range.map(|z| z.1.into()), - mmin: b.m_range.map(|m| m.0.into()), - mmax: b.m_range.map(|m| m.1.into()), - } - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/parquet/src/geospatial/statistics.rs b/parquet/src/geospatial/statistics.rs index 6d7cd030f433..2e99d9c62aff 100644 --- a/parquet/src/geospatial/statistics.rs +++ b/parquet/src/geospatial/statistics.rs @@ -20,7 +20,6 @@ //! This module provides functionality for working with geospatial statistics in Parquet files. //! It includes support for bounding boxes and geospatial statistics in column chunk metadata. -use crate::format::GeospatialStatistics as TGeospatialStatistics; use crate::geospatial::bounding_box::BoundingBox; // ---------------------------------------------------------------------- @@ -70,44 +69,11 @@ impl GeospatialStatistics { } } -/// Converts our internal geospatial statistics to the Thrift-generated format. -pub fn to_thrift(geo_statistics: Option<&GeospatialStatistics>) -> Option { - let geo_stats = geo_statistics?; - let bbox = geo_stats.bbox.clone().map(|bbox| bbox.into()); - let geospatial_types = geo_stats.geospatial_types.clone(); - Some(TGeospatialStatistics::new(bbox, geospatial_types)) -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn test_bbox_to_thrift() { - use crate::format as parquet; - use thrift::OrderedFloat; - - let bbox = BoundingBox::new(0.0, 0.0, 100.0, 100.0); - let thrift_bbox: parquet::BoundingBox = bbox.into(); - assert_eq!(thrift_bbox.xmin, 0.0); - assert_eq!(thrift_bbox.xmax, 0.0); - assert_eq!(thrift_bbox.ymin, 100.0); - assert_eq!(thrift_bbox.ymax, 100.0); - assert_eq!(thrift_bbox.zmin, None); - assert_eq!(thrift_bbox.zmax, None); - assert_eq!(thrift_bbox.mmin, None); - assert_eq!(thrift_bbox.mmax, None); - - let bbox_z = BoundingBox::new(0.0, 0.0, 100.0, 100.0).with_zrange(5.0, 15.0); - let thrift_bbox_z: parquet::BoundingBox = bbox_z.into(); - assert_eq!(thrift_bbox_z.zmin, Some(OrderedFloat(5.0))); - assert_eq!(thrift_bbox_z.zmax, Some(OrderedFloat(15.0))); - - let bbox_m = BoundingBox::new(0.0, 0.0, 100.0, 100.0).with_mrange(10.0, 20.0); - let thrift_bbox_m: parquet::BoundingBox = bbox_m.into(); - assert_eq!(thrift_bbox_m.mmin, Some(OrderedFloat(10.0))); - assert_eq!(thrift_bbox_m.mmax, Some(OrderedFloat(20.0))); - } + // TODO(ets): add round trip to/from parquet tests #[test] fn test_read_geospatial_statistics_from_file() { diff --git a/parquet/src/parquet_macros.rs b/parquet/src/parquet_macros.rs index 5720fd4ce0e7..80dc9658c04f 100644 --- a/parquet/src/parquet_macros.rs +++ b/parquet/src/parquet_macros.rs @@ -20,13 +20,22 @@ // They allow for pasting sections of the Parquet thrift IDL file // into a macro to generate rust structures and implementations. -// TODO(ets): These macros need a good bit of documentation so other developers will be able -// to use them correctly. Also need to write a .md file with complete examples of both how -// to use the macros, and how to implement custom readers and writers when necessary. +//! This is a collection of macros used to parse Thrift IDL descriptions of structs, +//! unions, and enums into their corresponding Rust types. These macros will also +//! generate the code necessary to serialize and deserialize to/from the [Thrift compact] +//! protocol. +//! +//! Further details of how to use them (and other aspects of the Thrift serialization process) +//! can be found in [THRIFT.md]. +//! +//! [Thrift compact]: https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set +//! [THRIFT.md]: https://github.com/apache/arrow-rs/blob/main/parquet/THRIFT.md #[macro_export] #[allow(clippy::crate_in_macro_def)] -/// macro to generate rust enums from a thrift enum definition +/// Macro used to generate rust enums from a Thrift `enum` definition. +/// +/// When utilizing this macro the Thrift serialization traits and structs need to be in scope. macro_rules! thrift_enum { ($(#[$($def_attrs:tt)*])* enum $identifier:ident { $($(#[$($field_attrs:tt)*])* $field_name:ident = $field_value:literal;)* }) => { $(#[$($def_attrs)*])* @@ -69,32 +78,19 @@ macro_rules! thrift_enum { Ok(field_id) } } - - // TODO: remove when we finally get rid of the format module - impl TryFrom for $identifier { - type Error = ParquetError; - - #[allow(deprecated)] - fn try_from(value: crate::format::$identifier) -> Result { - Ok(match value { - $(crate::format::$identifier::$field_name => Self::$field_name,)* - _ => return Err(general_err!("Unexpected parquet {}: {}", stringify!($identifier), value.0)), - }) - } - } - - impl From<$identifier> for crate::format::$identifier { - #[allow(deprecated)] - fn from(value: $identifier) -> Self { - match value { - $($identifier::$field_name => Self::$field_name,)* - } - } - } } } -/// macro to generate rust enums for thrift unions where all fields are typed with empty structs +/// Macro used to generate Rust enums for Thrift unions in which all variants are typed with empty +/// structs. +/// +/// Because the compact protocol does not write any struct type information, these empty structs +/// become a single `0` (end-of-fields marker) upon serialization. Rather than trying to deserialize +/// an empty struct, we can instead simply read the `0` and discard it. +/// +/// The resulting Rust enum will have all unit variants. +/// +/// When utilizing this macro the Thrift serialization traits and structs need to be in scope. #[macro_export] #[allow(clippy::crate_in_macro_def)] macro_rules! thrift_union_all_empty { @@ -153,30 +149,20 @@ macro_rules! thrift_union_all_empty { Ok(field_id) } } - - // TODO: remove when we finally get rid of the format module - impl From for $identifier { - fn from(value: crate::format::$identifier) -> Self { - match value { - $(crate::format::$identifier::$field_name(_) => Self::$field_name,)* - } - } - } - - impl From<$identifier> for crate::format::$identifier { - fn from(value: $identifier) -> Self { - match value { - $($identifier::$field_name => Self::$field_name(Default::default()),)* - } - } - } } } -/// macro to generate rust enums for thrift unions where all variants are a mix of unit and tuple types. -/// this requires modifying the thrift IDL. For variants with empty structs as their type, -/// delete the typename (i.e. "1: EmptyStruct Var1;" => "1: Var1"). For variants with a non-empty -/// type, put the typename in parens (e.g" "1: Type Var1;" => "1: (Type) Var1;"). +/// Macro used to generate Rust enums for Thrift unions where variants are a mix of unit and +/// tuple types. +/// +/// Use of this macro requires modifying the thrift IDL. For variants with empty structs as their +/// type, delete the typename (i.e. `1: EmptyStruct Var1;` becomes `1: Var1`). For variants with a +/// non-empty type, the typename must be contained within parens (e.g. `1: MyType Var1;` becomes +/// `1: (MyType) Var1;`). +/// +/// This macro allows for specifying lifetime annotations for the resulting `enum` and its fields. +/// +/// When utilizing this macro the Thrift serialization traits and structs need to be in scope. #[macro_export] #[allow(clippy::crate_in_macro_def)] macro_rules! thrift_union { @@ -237,31 +223,11 @@ macro_rules! thrift_union { } } -#[doc(hidden)] -#[macro_export] -macro_rules! __thrift_write_variant_lhs { - ($field_name:ident $field_type:ident, $val:tt) => { - Self::$field_name($val) - }; - ($field_name:ident, $val:tt) => { - Self::$field_name - }; -} - -#[doc(hidden)] -#[macro_export] -macro_rules! __thrift_write_variant_rhs { - ($field_id:literal $field_type:ident, $writer:tt, $val:ident) => { - $val.write_thrift_field($writer, $field_id, 0)? - }; - ($field_id:literal, $writer:tt, $val:tt) => { - $writer.write_empty_struct($field_id, 0)? - }; -} - -/// macro to generate rust structs from a thrift struct definition -/// unlike enum and union, this macro will allow for visibility specifier -/// can also take optional lifetime for struct and elements within it (need e.g.) +/// Macro used to generate Rust structs from a Thrift `struct` definition. +/// +/// This macro allows for specifying lifetime annotations for the resulting `struct` and its fields. +/// +/// When utilizing this macro the Thrift serialization traits and structs need to be in scope. #[macro_export] macro_rules! thrift_struct { ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { @@ -323,66 +289,6 @@ macro_rules! thrift_struct { } } -/// only implements ReadThrift for the give IDL struct definition -#[macro_export] -macro_rules! thrift_struct_read_impl { - ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { - $(#[cfg_attr(not(doctest), $($def_attrs)*)])* - impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for $identifier $(<$lt>)? { - fn read_thrift(prot: &mut R) -> Result { - $(let mut $field_name: Option<$crate::__thrift_field_type!($field_type $($field_lt)? $($element_type)?)> = None;)* - let mut last_field_id = 0i16; - loop { - let field_ident = prot.read_field_begin(last_field_id)?; - if field_ident.field_type == FieldType::Stop { - break; - } - match field_ident.id { - $($field_id => { - let val = $crate::__thrift_read_field!(prot, field_ident, $field_type $($field_lt)? $($element_type)?); - $field_name = Some(val); - })* - _ => { - prot.skip(field_ident.field_type)?; - } - }; - last_field_id = field_ident.id; - } - $($crate::__thrift_result_required_or_optional!($required_or_optional $field_name);)* - Ok(Self { - $($field_name),* - }) - } - } - } -} - -/// only implements WriteThrift for the give IDL struct definition -#[macro_export] -macro_rules! thrift_struct_write_impl { - ($(#[$($def_attrs:tt)*])* $vis:vis struct $identifier:ident $(< $lt:lifetime >)? { $($(#[$($field_attrs:tt)*])* $field_id:literal : $required_or_optional:ident $field_type:ident $(< $field_lt:lifetime >)? $(< $element_type:ident >)? $field_name:ident $(= $default_value:literal)? $(;)?)* }) => { - impl $(<$lt>)? WriteThrift for $identifier $(<$lt>)? { - const ELEMENT_TYPE: ElementType = ElementType::Struct; - - #[allow(unused_assignments)] - fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { - #[allow(unused_mut, unused_variables)] - let mut last_field_id = 0i16; - $($crate::__thrift_write_required_or_optional_field!($required_or_optional $field_name, $field_id, $field_type, self, writer, last_field_id);)* - writer.write_struct_end() - } - } - - impl $(<$lt>)? WriteThriftField for $identifier $(<$lt>)? { - fn write_thrift_field(&self, writer: &mut ThriftCompactOutputProtocol, field_id: i16, last_field_id: i16) -> Result { - writer.write_field_begin(FieldType::Struct, field_id, last_field_id)?; - self.write_thrift(writer)?; - Ok(field_id) - } - } - } -} - #[doc(hidden)] #[macro_export] macro_rules! __thrift_write_required_or_optional_field { @@ -549,3 +455,25 @@ macro_rules! __thrift_read_variant { Self::$field_name }}; } + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_write_variant_lhs { + ($field_name:ident $field_type:ident, $val:tt) => { + Self::$field_name($val) + }; + ($field_name:ident, $val:tt) => { + Self::$field_name + }; +} + +#[doc(hidden)] +#[macro_export] +macro_rules! __thrift_write_variant_rhs { + ($field_id:literal $field_type:ident, $writer:tt, $val:ident) => { + $val.write_thrift_field($writer, $field_id, 0)? + }; + ($field_id:literal, $writer:tt, $val:tt) => { + $writer.write_empty_struct($field_id, 0)? + }; +} diff --git a/parquet/src/parquet_thrift.rs b/parquet/src/parquet_thrift.rs index 6c3c70a8eb34..e27c7d16efdb 100644 --- a/parquet/src/parquet_thrift.rs +++ b/parquet/src/parquet_thrift.rs @@ -15,10 +15,16 @@ // specific language governing permissions and limitations // under the License. -//! experimental replacement for thrift decoder -// this is a copy of TCompactSliceInputProtocol, but modified -// to not allocate byte arrays or strings. -#![allow(dead_code)] +//! Structs used for encoding and decoding Parquet Thrift objects. +//! +//! These include: +//! * [`ThriftCompactInputProtocol`]: Trait implemented by Thrift decoders. +//! * [`ThriftSliceInputProtocol`]: Thrift decoder that takes a slice of bytes as input. +//! * [`ThriftReadInputProtocol`]: Thrift decoder that takes a [`Read`] as input. +//! * [`ReadThrift`]: Trait implemented by serializable objects. +//! * [`ThriftCompactOutputProtocol`]: Thrift encoder. +//! * [`WriteThrift`]: Trait implemented by serializable objects. +//! * [`WriteThriftField`]: Trait implemented by serializable objects that are fields in Thrift structs. use std::{ cmp::Ordering, @@ -212,10 +218,10 @@ pub(crate) trait ThriftCompactInputProtocol<'a> { loop { let byte = self.read_byte()?; in_progress |= ((byte & 0x7F) as u64).wrapping_shl(shift); - shift += 7; if byte & 0x80 == 0 { return Ok(in_progress); } + shift += 7; } } @@ -438,11 +444,6 @@ impl<'a> ThriftSliceInputProtocol<'a> { Self { buf } } - /// Re-initialize this reader with a new slice. - pub fn reset_buffer(&mut self, buf: &'a [u8]) { - self.buf = buf; - } - /// Return the current buffer as a slice. pub fn as_slice(&self) -> &'a [u8] { self.buf @@ -638,11 +639,6 @@ impl ThriftCompactOutputProtocol { Self { writer } } - /// Return a reference to the underlying `Write`. - pub(crate) fn inner(&self) -> &W { - &self.writer - } - /// Write a single byte to the output stream. fn write_byte(&mut self, b: u8) -> Result<()> { self.writer.write_all(&[b])?; @@ -1077,13 +1073,13 @@ pub(crate) mod tests { where T: for<'a> ReadThrift<'a, ThriftSliceInputProtocol<'a>> + WriteThrift + PartialEq + Debug, { - let buf = Vec::::new(); - let mut writer = ThriftCompactOutputProtocol::new(buf); - val.write_thrift(&mut writer).unwrap(); - - //println!("serialized: {:x?}", writer.inner()); + let mut buf = Vec::::new(); + { + let mut writer = ThriftCompactOutputProtocol::new(&mut buf); + val.write_thrift(&mut writer).unwrap(); + } - let mut prot = ThriftSliceInputProtocol::new(writer.inner()); + let mut prot = ThriftSliceInputProtocol::new(&buf); let read_val = T::read_thrift(&mut prot).unwrap(); assert_eq!(val, read_val); } diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs index 66d0621bd9f6..9629e17b4752 100644 --- a/parquet/src/schema/types.rs +++ b/parquet/src/schema/types.rs @@ -1032,7 +1032,8 @@ impl SchemaDescriptor { pub fn new(tp: TypePtr) -> Self { const INIT_SCHEMA_DEPTH: usize = 16; assert!(tp.is_group(), "SchemaDescriptor should take a GroupType"); - let n_leaves = num_leaves(&tp); + // unwrap should be safe since we just asserted tp is a group + let n_leaves = num_leaves(&tp).unwrap(); let mut leaves = Vec::with_capacity(n_leaves); let mut leaf_to_base = Vec::with_capacity(n_leaves); let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH); @@ -1117,12 +1118,15 @@ impl SchemaDescriptor { } // walk tree and count nodes -pub(crate) fn num_nodes(tp: &TypePtr) -> usize { +pub(crate) fn num_nodes(tp: &TypePtr) -> Result { + if !tp.is_group() { + return Err(general_err!("Root schema must be Group type")); + } let mut n_nodes = 1usize; // count root for f in tp.get_fields().iter() { count_nodes(f, &mut n_nodes); } - n_nodes + Ok(n_nodes) } pub(crate) fn count_nodes(tp: &TypePtr, n_nodes: &mut usize) { @@ -1135,12 +1139,15 @@ pub(crate) fn count_nodes(tp: &TypePtr, n_nodes: &mut usize) { } // do a quick walk of the tree to get proper sizing for SchemaDescriptor arrays -fn num_leaves(tp: &TypePtr) -> usize { +fn num_leaves(tp: &TypePtr) -> Result { + if !tp.is_group() { + return Err(general_err!("Root schema must be Group type")); + } let mut n_leaves = 0usize; for f in tp.get_fields().iter() { count_leaves(f, &mut n_leaves); } - n_leaves + Ok(n_leaves) } fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) { @@ -1206,29 +1213,6 @@ fn build_tree<'a>( } } -/// Method to convert from Thrift. -pub fn from_thrift(elements: &[crate::format::SchemaElement]) -> Result { - let mut index = 0; - let mut schema_nodes = Vec::new(); - while index < elements.len() { - let t = from_thrift_helper(elements, index)?; - index = t.0; - schema_nodes.push(t.1); - } - if schema_nodes.len() != 1 { - return Err(general_err!( - "Expected exactly one root node, but found {}", - schema_nodes.len() - )); - } - - if !schema_nodes[0].is_group() { - return Err(general_err!("Expected root node to be a group type")); - } - - Ok(schema_nodes.remove(0)) -} - /// Checks if the logical type is valid. fn check_logical_type(logical_type: &Option) -> Result<()> { if let Some(LogicalType::Integer { bit_width, .. }) = *logical_type { @@ -1241,215 +1225,6 @@ fn check_logical_type(logical_type: &Option) -> Result<()> { Ok(()) } -/// Constructs a new Type from the `elements`, starting at index `index`. -/// The first result is the starting index for the next Type after this one. If it is -/// equal to `elements.len()`, then this Type is the last one. -/// The second result is the result Type. -fn from_thrift_helper( - elements: &[crate::format::SchemaElement], - index: usize, -) -> Result<(usize, TypePtr)> { - // Whether or not the current node is root (message type). - // There is only one message type node in the schema tree. - let is_root_node = index == 0; - - if index >= elements.len() { - return Err(general_err!( - "Index out of bound, index = {}, len = {}", - index, - elements.len() - )); - } - let element = &elements[index]; - - // Check for empty schema - if let (true, None | Some(0)) = (is_root_node, element.num_children) { - let builder = Type::group_type_builder(&element.name); - return Ok((index + 1, Arc::new(builder.build().unwrap()))); - } - - let converted_type = ConvertedType::try_from(element.converted_type)?; - // LogicalType is only present in v2 Parquet files. ConvertedType is always - // populated, regardless of the version of the file (v1 or v2). - let logical_type = element - .logical_type - .as_ref() - .map(|value| LogicalType::from(value.clone())); - - check_logical_type(&logical_type)?; - - let field_id = elements[index].field_id; - match elements[index].num_children { - // From parquet-format: - // The children count is used to construct the nested relationship. - // This field is not set when the element is a primitive type - // Sometimes parquet-cpp sets num_children field to 0 for primitive types, so we - // have to handle this case too. - None | Some(0) => { - // primitive type - if elements[index].repetition_type.is_none() { - return Err(general_err!( - "Repetition level must be defined for a primitive type" - )); - } - let repetition = Repetition::try_from(elements[index].repetition_type.unwrap())?; - if let Some(type_) = elements[index].type_ { - let physical_type = PhysicalType::try_from(type_)?; - let length = elements[index].type_length.unwrap_or(-1); - let scale = elements[index].scale.unwrap_or(-1); - let precision = elements[index].precision.unwrap_or(-1); - let name = &elements[index].name; - let builder = Type::primitive_type_builder(name, physical_type) - .with_repetition(repetition) - .with_converted_type(converted_type) - .with_logical_type(logical_type) - .with_length(length) - .with_precision(precision) - .with_scale(scale) - .with_id(field_id); - Ok((index + 1, Arc::new(builder.build()?))) - } else { - let mut builder = Type::group_type_builder(&elements[index].name) - .with_converted_type(converted_type) - .with_logical_type(logical_type) - .with_id(field_id); - if !is_root_node { - // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or - // REPEATED for root node. - // - // We only set repetition for group types that are not top-level message - // type. According to parquet-format: - // Root of the schema does not have a repetition_type. - // All other types must have one. - builder = builder.with_repetition(repetition); - } - Ok((index + 1, Arc::new(builder.build().unwrap()))) - } - } - Some(n) => { - let repetition = elements[index] - .repetition_type - .map(Repetition::try_from) - .transpose()?; - - let mut fields = Vec::with_capacity(n as usize); - let mut next_index = index + 1; - for _ in 0..n { - let child_result = from_thrift_helper(elements, next_index)?; - next_index = child_result.0; - fields.push(child_result.1); - } - - let mut builder = Type::group_type_builder(&elements[index].name) - .with_converted_type(converted_type) - .with_logical_type(logical_type) - .with_fields(fields) - .with_id(field_id); - if let Some(rep) = repetition { - // Sometimes parquet-cpp and parquet-mr set repetition level REQUIRED or - // REPEATED for root node. - // - // We only set repetition for group types that are not top-level message - // type. According to parquet-format: - // Root of the schema does not have a repetition_type. - // All other types must have one. - if !is_root_node { - builder = builder.with_repetition(rep); - } - } - Ok((next_index, Arc::new(builder.build().unwrap()))) - } - } -} - -/// Method to convert to Thrift. -pub fn to_thrift(schema: &Type) -> Result> { - if !schema.is_group() { - return Err(general_err!("Root schema must be Group type")); - } - let mut elements: Vec = Vec::new(); - to_thrift_helper(schema, &mut elements); - Ok(elements) -} - -/// Constructs list of `SchemaElement` from the schema using depth-first traversal. -/// Here we assume that schema is always valid and starts with group type. -fn to_thrift_helper(schema: &Type, elements: &mut Vec) { - match *schema { - Type::PrimitiveType { - ref basic_info, - physical_type, - type_length, - scale, - precision, - } => { - let element = crate::format::SchemaElement { - type_: Some(physical_type.into()), - type_length: if type_length >= 0 { - Some(type_length) - } else { - None - }, - repetition_type: Some(basic_info.repetition().into()), - name: basic_info.name().to_owned(), - num_children: None, - converted_type: basic_info.converted_type().into(), - scale: if scale >= 0 { Some(scale) } else { None }, - precision: if precision >= 0 { - Some(precision) - } else { - None - }, - field_id: if basic_info.has_id() { - Some(basic_info.id()) - } else { - None - }, - logical_type: basic_info.logical_type().map(|value| value.into()), - }; - - elements.push(element); - } - Type::GroupType { - ref basic_info, - ref fields, - } => { - let repetition = if basic_info.has_repetition() { - Some(basic_info.repetition().into()) - } else { - None - }; - - let element = crate::format::SchemaElement { - type_: None, - type_length: None, - repetition_type: repetition, - name: basic_info.name().to_owned(), - num_children: Some(fields.len() as i32), - converted_type: basic_info.converted_type().into(), - scale: None, - precision: None, - field_id: if basic_info.has_id() { - Some(basic_info.id()) - } else { - None - }, - logical_type: basic_info.logical_type().map(|value| value.into()), - }; - - elements.push(element); - - // Add child elements for a group - for field in fields { - to_thrift_helper(field, elements); - } - } - } -} - -// This is a copy of `from_thrift` above, but rather than `format::SchemaElement` it takes -// the `file::metadata::thrift_gen::SchemaElement<'a>`. - // convert thrift decoded array of `SchemaElement` into this crate's representation of // parquet types. this function consumes `elements`. pub(crate) fn parquet_schema_from_array<'a>(elements: Vec>) -> Result { @@ -1526,8 +1301,7 @@ fn schema_from_array_helper<'a>( )); } let repetition = element.repetition_type.unwrap(); - if let Some(type_) = element.type_ { - let physical_type = type_; + if let Some(physical_type) = element.r#type { let length = element.type_length.unwrap_or(-1); let scale = element.scale.unwrap_or(-1); let precision = element.precision.unwrap_or(-1); @@ -1596,7 +1370,10 @@ fn schema_from_array_helper<'a>( mod tests { use super::*; - use crate::schema::parser::parse_message_type; + use crate::{ + file::metadata::thrift_gen::tests::{buf_to_schema_list, roundtrip_schema, schema_to_buf}, + schema::parser::parse_message_type, + }; // TODO: add tests for v2 types @@ -2395,7 +2172,8 @@ mod tests { let schema = Type::primitive_type_builder("col", PhysicalType::INT32) .build() .unwrap(); - let thrift_schema = to_thrift(&schema); + let schema = Arc::new(schema); + let thrift_schema = schema_to_buf(&schema); assert!(thrift_schema.is_err()); if let Err(e) = thrift_schema { assert_eq!( @@ -2455,8 +2233,7 @@ mod tests { } "; let expected_schema = parse_message_type(message_type).unwrap(); - let thrift_schema = to_thrift(&expected_schema).unwrap(); - let result_schema = from_thrift(&thrift_schema).unwrap(); + let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap(); assert_eq!(result_schema, Arc::new(expected_schema)); } @@ -2471,8 +2248,7 @@ mod tests { } "; let expected_schema = parse_message_type(message_type).unwrap(); - let thrift_schema = to_thrift(&expected_schema).unwrap(); - let result_schema = from_thrift(&thrift_schema).unwrap(); + let result_schema = roundtrip_schema(Arc::new(expected_schema.clone())).unwrap(); assert_eq!(result_schema, Arc::new(expected_schema)); } @@ -2492,8 +2268,10 @@ mod tests { } "; - let expected_schema = parse_message_type(message_type).unwrap(); - let mut thrift_schema = to_thrift(&expected_schema).unwrap(); + let expected_schema = Arc::new(parse_message_type(message_type).unwrap()); + let mut buf = schema_to_buf(&expected_schema).unwrap(); + let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap(); + // Change all of None to Some(0) for elem in &mut thrift_schema[..] { if elem.num_children.is_none() { @@ -2501,8 +2279,8 @@ mod tests { } } - let result_schema = from_thrift(&thrift_schema).unwrap(); - assert_eq!(result_schema, Arc::new(expected_schema)); + let result_schema = parquet_schema_from_array(thrift_schema).unwrap(); + assert_eq!(result_schema, expected_schema); } // Sometimes parquet-cpp sets repetition level for the root node, which is against @@ -2517,23 +2295,25 @@ mod tests { } "; - let expected_schema = parse_message_type(message_type).unwrap(); - let mut thrift_schema = to_thrift(&expected_schema).unwrap(); - thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into()); + let expected_schema = Arc::new(parse_message_type(message_type).unwrap()); + let mut buf = schema_to_buf(&expected_schema).unwrap(); + let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap(); + thrift_schema[0].repetition_type = Some(Repetition::REQUIRED); - let result_schema = from_thrift(&thrift_schema).unwrap(); - assert_eq!(result_schema, Arc::new(expected_schema)); + let result_schema = parquet_schema_from_array(thrift_schema).unwrap(); + assert_eq!(result_schema, expected_schema); } #[test] fn test_schema_from_thrift_group_has_no_child() { let message_type = "message schema {}"; - let expected_schema = parse_message_type(message_type).unwrap(); - let mut thrift_schema = to_thrift(&expected_schema).unwrap(); - thrift_schema[0].repetition_type = Some(Repetition::REQUIRED.into()); + let expected_schema = Arc::new(parse_message_type(message_type).unwrap()); + let mut buf = schema_to_buf(&expected_schema).unwrap(); + let mut thrift_schema = buf_to_schema_list(&mut buf).unwrap(); + thrift_schema[0].repetition_type = Some(Repetition::REQUIRED); - let result_schema = from_thrift(&thrift_schema).unwrap(); - assert_eq!(result_schema, Arc::new(expected_schema)); + let result_schema = parquet_schema_from_array(thrift_schema).unwrap(); + assert_eq!(result_schema, expected_schema); } } diff --git a/parquet/src/thrift.rs b/parquet/src/thrift.rs index 4ef5249c129e..2eb91162ac38 100644 --- a/parquet/src/thrift.rs +++ b/parquet/src/thrift.rs @@ -18,10 +18,7 @@ //! Custom thrift definitions pub use thrift::protocol::TCompactOutputProtocol; -use thrift::protocol::{ - TFieldIdentifier, TInputProtocol, TListIdentifier, TMapIdentifier, TMessageIdentifier, - TOutputProtocol, TSetIdentifier, TStructIdentifier, TType, -}; +use thrift::protocol::{TInputProtocol, TOutputProtocol}; /// Reads and writes the struct to Thrift protocols. /// @@ -33,332 +30,57 @@ pub trait TSerializable: Sized { fn write_to_out_protocol(&self, o_prot: &mut T) -> thrift::Result<()>; } -/// A more performant implementation of [`TCompactInputProtocol`] that reads a slice -/// -/// [`TCompactInputProtocol`]: thrift::protocol::TCompactInputProtocol -pub(crate) struct TCompactSliceInputProtocol<'a> { - buf: &'a [u8], - // Identifier of the last field deserialized for a struct. - last_read_field_id: i16, - // Stack of the last read field ids (a new entry is added each time a nested struct is read). - read_field_id_stack: Vec, - // Boolean value for a field. - // Saved because boolean fields and their value are encoded in a single byte, - // and reading the field only occurs after the field id is read. - pending_read_bool_value: Option, -} - -impl<'a> TCompactSliceInputProtocol<'a> { - pub fn new(buf: &'a [u8]) -> Self { - Self { - buf, - last_read_field_id: 0, - read_field_id_stack: Vec::with_capacity(16), - pending_read_bool_value: None, - } - } - - pub fn as_slice(&self) -> &'a [u8] { - self.buf - } - - fn read_vlq(&mut self) -> thrift::Result { - let mut in_progress = 0; - let mut shift = 0; - loop { - let byte = self.read_byte()?; - in_progress |= ((byte & 0x7F) as u64).wrapping_shl(shift); - shift += 7; - if byte & 0x80 == 0 { - return Ok(in_progress); - } - } - } - - fn read_zig_zag(&mut self) -> thrift::Result { - let val = self.read_vlq()?; - Ok((val >> 1) as i64 ^ -((val & 1) as i64)) - } - - fn read_list_set_begin(&mut self) -> thrift::Result<(TType, i32)> { - let header = self.read_byte()?; - let element_type = collection_u8_to_type(header & 0x0F)?; - - let possible_element_count = (header & 0xF0) >> 4; - let element_count = if possible_element_count != 15 { - // high bits set high if count and type encoded separately - possible_element_count as i32 - } else { - self.read_vlq()? as _ - }; - - Ok((element_type, element_count)) - } -} - -macro_rules! thrift_unimplemented { - () => { - Err(thrift::Error::Protocol(thrift::ProtocolError { - kind: thrift::ProtocolErrorKind::NotImplemented, - message: "not implemented".to_string(), - })) - }; -} - -impl TInputProtocol for TCompactSliceInputProtocol<'_> { - fn read_message_begin(&mut self) -> thrift::Result { - unimplemented!() - } - - fn read_message_end(&mut self) -> thrift::Result<()> { - thrift_unimplemented!() - } - - fn read_struct_begin(&mut self) -> thrift::Result> { - self.read_field_id_stack.push(self.last_read_field_id); - self.last_read_field_id = 0; - Ok(None) - } - - fn read_struct_end(&mut self) -> thrift::Result<()> { - self.last_read_field_id = self - .read_field_id_stack - .pop() - .expect("should have previous field ids"); - Ok(()) - } - - fn read_field_begin(&mut self) -> thrift::Result { - // we can read at least one byte, which is: - // - the type - // - the field delta and the type - let field_type = self.read_byte()?; - let field_delta = (field_type & 0xF0) >> 4; - let field_type = match field_type & 0x0F { - 0x01 => { - self.pending_read_bool_value = Some(true); - Ok(TType::Bool) - } - 0x02 => { - self.pending_read_bool_value = Some(false); - Ok(TType::Bool) - } - ttu8 => u8_to_type(ttu8), - }?; - - match field_type { - TType::Stop => Ok( - TFieldIdentifier::new::, String, Option>( - None, - TType::Stop, - None, - ), - ), - _ => { - if field_delta != 0 { - self.last_read_field_id = self - .last_read_field_id - .checked_add(field_delta as i16) - .map_or_else( - || { - Err(thrift::Error::Protocol(thrift::ProtocolError { - kind: thrift::ProtocolErrorKind::InvalidData, - message: format!( - "cannot add {} to {}", - field_delta, self.last_read_field_id - ), - })) - }, - Ok, - )?; - } else { - self.last_read_field_id = self.read_i16()?; - }; - - Ok(TFieldIdentifier { - name: None, - field_type, - id: Some(self.last_read_field_id), - }) - } - } - } - - fn read_field_end(&mut self) -> thrift::Result<()> { - Ok(()) - } - - fn read_bool(&mut self) -> thrift::Result { - match self.pending_read_bool_value.take() { - Some(b) => Ok(b), - None => { - let b = self.read_byte()?; - // Previous versions of the thrift specification said to use 0 and 1 inside collections, - // but that differed from existing implementations. - // The specification was updated in https://github.com/apache/thrift/commit/2c29c5665bc442e703480bb0ee60fe925ffe02e8. - // At least the go implementation seems to have followed the previously documented values. - match b { - 0x01 => Ok(true), - 0x00 | 0x02 => Ok(false), - unkn => Err(thrift::Error::Protocol(thrift::ProtocolError { - kind: thrift::ProtocolErrorKind::InvalidData, - message: format!("cannot convert {unkn} into bool"), - })), - } - } - } - } - - fn read_bytes(&mut self) -> thrift::Result> { - let len = self.read_vlq()? as usize; - let ret = self.buf.get(..len).ok_or_else(eof_error)?.to_vec(); - self.buf = &self.buf[len..]; - Ok(ret) - } - - fn read_i8(&mut self) -> thrift::Result { - Ok(self.read_byte()? as _) - } - - fn read_i16(&mut self) -> thrift::Result { - Ok(self.read_zig_zag()? as _) - } - - fn read_i32(&mut self) -> thrift::Result { - Ok(self.read_zig_zag()? as _) - } - - fn read_i64(&mut self) -> thrift::Result { - self.read_zig_zag() - } - - fn read_double(&mut self) -> thrift::Result { - let slice = (self.buf[..8]).try_into().unwrap(); - self.buf = &self.buf[8..]; - Ok(f64::from_le_bytes(slice)) - } - - fn read_string(&mut self) -> thrift::Result { - let bytes = self.read_bytes()?; - String::from_utf8(bytes).map_err(From::from) - } - - fn read_list_begin(&mut self) -> thrift::Result { - let (element_type, element_count) = self.read_list_set_begin()?; - Ok(TListIdentifier::new(element_type, element_count)) - } - - fn read_list_end(&mut self) -> thrift::Result<()> { - Ok(()) - } - - fn read_set_begin(&mut self) -> thrift::Result { - thrift_unimplemented!() - } - - fn read_set_end(&mut self) -> thrift::Result<()> { - thrift_unimplemented!() - } - - fn read_map_begin(&mut self) -> thrift::Result { - thrift_unimplemented!() - } - - fn read_map_end(&mut self) -> thrift::Result<()> { - Ok(()) - } - - #[inline] - fn read_byte(&mut self) -> thrift::Result { - let ret = *self.buf.first().ok_or_else(eof_error)?; - self.buf = &self.buf[1..]; - Ok(ret) - } -} - -fn collection_u8_to_type(b: u8) -> thrift::Result { - match b { - // For historical and compatibility reasons, a reader should be capable to deal with both cases. - // The only valid value in the original spec was 2, but due to an widespread implementation bug - // the defacto standard across large parts of the library became 1 instead. - // As a result, both values are now allowed. - // https://github.com/apache/thrift/blob/master/doc/specs/thrift-compact-protocol.md#list-and-set - 0x01 | 0x02 => Ok(TType::Bool), - o => u8_to_type(o), - } -} - -fn u8_to_type(b: u8) -> thrift::Result { - match b { - 0x00 => Ok(TType::Stop), - 0x03 => Ok(TType::I08), // equivalent to TType::Byte - 0x04 => Ok(TType::I16), - 0x05 => Ok(TType::I32), - 0x06 => Ok(TType::I64), - 0x07 => Ok(TType::Double), - 0x08 => Ok(TType::String), - 0x09 => Ok(TType::List), - 0x0A => Ok(TType::Set), - 0x0B => Ok(TType::Map), - 0x0C => Ok(TType::Struct), - unkn => Err(thrift::Error::Protocol(thrift::ProtocolError { - kind: thrift::ProtocolErrorKind::InvalidData, - message: format!("cannot convert {unkn} into TType"), - })), - } -} - -fn eof_error() -> thrift::Error { - thrift::Error::Transport(thrift::TransportError { - kind: thrift::TransportErrorKind::EndOfFile, - message: "Unexpected EOF".to_string(), - }) -} - #[cfg(test)] mod tests { - use crate::thrift::{TCompactSliceInputProtocol, TSerializable}; + use crate::{ + basic::Type, + file::page_index::{column_index::ColumnIndexMetaData, index_reader::decode_column_index}, + }; #[test] pub fn read_boolean_list_field_type() { // Boolean collection type encoded as 0x01, as used by this crate when writing. // Values encoded as 1 (true) or 2 (false) as in the current version of the thrift // documentation. - let bytes = vec![0x19, 0x21, 2, 1, 0x19, 8, 0x19, 8, 0x15, 0, 0]; - - let mut protocol = TCompactSliceInputProtocol::new(bytes.as_slice()); - let index = crate::format::ColumnIndex::read_from_in_protocol(&mut protocol).unwrap(); - let expected = crate::format::ColumnIndex { - null_pages: vec![false, true], - min_values: vec![], - max_values: vec![], - boundary_order: crate::format::BoundaryOrder::UNORDERED, - null_counts: None, - repetition_level_histograms: None, - definition_level_histograms: None, + let bytes = vec![ + 0x19, 0x21, 2, 1, 0x19, 0x28, 1, 0, 0, 0x19, 0x28, 1, 1, 0, 0x15, 0, 0, + ]; + let index = decode_column_index(&bytes, Type::BOOLEAN).unwrap(); + + let index = match index { + ColumnIndexMetaData::BOOLEAN(index) => index, + _ => panic!("expected boolean column index"), }; - assert_eq!(&index, &expected); + // should be false, true + assert!(!index.is_null_page(0)); + assert!(index.is_null_page(1)); + assert!(!index.min_value(0).unwrap()); // min is false + assert!(index.max_value(0).unwrap()); // max is true + assert!(index.min_value(1).is_none()); + assert!(index.max_value(1).is_none()); } #[test] pub fn read_boolean_list_alternative_encoding() { // Boolean collection type encoded as 0x02, as allowed by the spec. // Values encoded as 1 (true) or 0 (false) as before the thrift documentation change on 2024-12-13. - let bytes = vec![0x19, 0x22, 0, 1, 0x19, 8, 0x19, 8, 0x15, 0, 0]; - - let mut protocol = TCompactSliceInputProtocol::new(bytes.as_slice()); - let index = crate::format::ColumnIndex::read_from_in_protocol(&mut protocol).unwrap(); - let expected = crate::format::ColumnIndex { - null_pages: vec![false, true], - min_values: vec![], - max_values: vec![], - boundary_order: crate::format::BoundaryOrder::UNORDERED, - null_counts: None, - repetition_level_histograms: None, - definition_level_histograms: None, + let bytes = vec![ + 0x19, 0x22, 0, 1, 0x19, 0x28, 1, 0, 0, 0x19, 0x28, 1, 1, 0, 0x15, 0, 0, + ]; + let index = decode_column_index(&bytes, Type::BOOLEAN).unwrap(); + + let index = match index { + ColumnIndexMetaData::BOOLEAN(index) => index, + _ => panic!("expected boolean column index"), }; - assert_eq!(&index, &expected); + // should be false, true + assert!(!index.is_null_page(0)); + assert!(index.is_null_page(1)); + assert!(!index.min_value(0).unwrap()); // min is false + assert!(index.max_value(0).unwrap()); // max is true + assert!(index.min_value(1).is_none()); + assert!(index.max_value(1).is_none()); } } diff --git a/parquet/tests/arrow_reader/io/mod.rs b/parquet/tests/arrow_reader/io/mod.rs index a4f94d3f9e66..2f335d9f7f82 100644 --- a/parquet/tests/arrow_reader/io/mod.rs +++ b/parquet/tests/arrow_reader/io/mod.rs @@ -47,6 +47,7 @@ use parquet::arrow::arrow_reader::{ use parquet::arrow::{ArrowWriter, ProjectionMask}; use parquet::data_type::AsBytes; use parquet::file::metadata::{FooterTail, ParquetMetaData, ParquetOffsetIndex}; +use parquet::file::page_index::offset_index::PageLocation; use parquet::file::properties::WriterProperties; use parquet::file::FOOTER_SIZE; use parquet::schema::types::SchemaDescriptor; @@ -256,7 +257,7 @@ struct TestColumnChunk { dictionary_page_location: Option, /// The location of the data pages in the file - page_locations: Vec, + page_locations: Vec, } /// Information about the pages in a single row group @@ -294,16 +295,11 @@ impl TestRowGroups { let start_offset = start_offset as usize; let end_offset = start_offset + length as usize; - let page_locations = page_locations - .iter() - .map(parquet::format::PageLocation::from) - .collect(); - TestColumnChunk { name: column_name.clone(), location: start_offset..end_offset, dictionary_page_location, - page_locations, + page_locations: page_locations.clone(), } }) .map(|test_column_chunk| { From a6d1d8e019381535b3c1b0ea21538716786ce9cb Mon Sep 17 00:00:00 2001 From: Ed Seidl Date: Thu, 2 Oct 2025 09:39:36 -0700 Subject: [PATCH 15/15] [thrift-remodel] Incorporate changes made to geospatial statistics (#8528) # Which issue does this PR close? **Note: this targets a feature branch, not main** - Part of #5854. # Rationale for this change This brings over changes to handling of geo-spatial statistics introduced by @paleolimbot in #8520. # What changes are included in this PR? Primarily adds documentation and tests to changes already made. The only significant change is adding a `Default` implementation for `EdgeInterpolationAlgorithm`. # Are these changes tested? Yes # Are there any user-facing changes? Yes --------- Co-authored-by: Matthijs Brobbel --- parquet/src/basic.rs | 124 ++++++++++++++++++++---- parquet/src/file/metadata/thrift_gen.rs | 2 +- parquet/src/geospatial/statistics.rs | 6 +- parquet/src/schema/printer.rs | 73 +++++++++++++- parquet/tests/geospatial.rs | 123 +++++++++++++++++++++++ 5 files changed, 303 insertions(+), 25 deletions(-) create mode 100644 parquet/tests/geospatial.rs diff --git a/parquet/src/basic.rs b/parquet/src/basic.rs index 350f2b6de1e2..68eebaf5080a 100644 --- a/parquet/src/basic.rs +++ b/parquet/src/basic.rs @@ -349,7 +349,8 @@ pub enum LogicalType { }, /// A geospatial feature in the Well-Known Binary (WKB) format with linear/planar edges interpolation. Geometry { - /// A custom CRS. If unset the defaults to `OGC:CRS84`. + /// A custom CRS. If unset the defaults to `OGC:CRS84`, which means that the geometries + /// must be stored in longitude, latitude based on the WGS84 datum. crs: Option, }, /// A geospatial feature in the WKB format with an explicit (non-linear/non-planar) edges interpolation. @@ -357,7 +358,7 @@ pub enum LogicalType { /// A custom CRS. If unset the defaults to `OGC:CRS84`. crs: Option, /// An optional algorithm can be set to correctly interpret edges interpolation - /// of the geometries. If unset, the algorithm defaults to `SPHERICAL``. + /// of the geometries. If unset, the algorithm defaults to `SPHERICAL`. algorithm: Option, }, /// For forward compatibility; used when an unknown union value is encountered. @@ -456,9 +457,14 @@ impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for LogicalType { } 18 => { let val = GeographyType::read_thrift(&mut *prot)?; + // unset algorithm means SPHERICAL, per the spec: + // https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#geography + let algorithm = val + .algorithm + .unwrap_or(EdgeInterpolationAlgorithm::SPHERICAL); Self::Geography { crs: val.crs.map(|s| s.to_owned()), - algorithm: val.algorithm, + algorithm: Some(algorithm), } } _ => { @@ -928,16 +934,79 @@ enum BoundaryOrder { // ---------------------------------------------------------------------- // Mirrors thrift enum `EdgeInterpolationAlgorithm` -thrift_enum!( -/// Edge interpolation algorithm for Geography logical type -enum EdgeInterpolationAlgorithm { - SPHERICAL = 0; - VINCENTY = 1; - THOMAS = 2; - ANDOYER = 3; - KARNEY = 4; +// this is hand coded to allow for the _Unknown variant (allows this to be forward compatible) + +/// Edge interpolation algorithm for [`LogicalType::Geography`] +#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +#[repr(i32)] +pub enum EdgeInterpolationAlgorithm { + /// Edges are interpolated as geodesics on a sphere. + SPHERICAL = 0, + /// + VINCENTY = 1, + /// Thomas, Paul D. Spheroidal geodesics, reference systems, & local geometry. US Naval Oceanographic Office, 1970 + THOMAS = 2, + /// Thomas, Paul D. Mathematical models for navigation systems. US Naval Oceanographic Office, 1965. + ANDOYER = 3, + /// Karney, Charles FF. "Algorithms for geodesics." Journal of Geodesy 87 (2013): 43-55 + KARNEY = 4, + /// Unknown algorithm + _Unknown(i32), +} + +impl fmt::Display for EdgeInterpolationAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_fmt(format_args!("{0:?}", self)) + } +} + +impl<'a, R: ThriftCompactInputProtocol<'a>> ReadThrift<'a, R> for EdgeInterpolationAlgorithm { + fn read_thrift(prot: &mut R) -> Result { + let val = prot.read_i32()?; + match val { + 0 => Ok(Self::SPHERICAL), + 1 => Ok(Self::VINCENTY), + 2 => Ok(Self::THOMAS), + 3 => Ok(Self::ANDOYER), + 4 => Ok(Self::KARNEY), + _ => Ok(Self::_Unknown(val)), + } + } +} + +impl WriteThrift for EdgeInterpolationAlgorithm { + const ELEMENT_TYPE: ElementType = ElementType::I32; + fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { + let val: i32 = match *self { + Self::SPHERICAL => 0, + Self::VINCENTY => 1, + Self::THOMAS => 2, + Self::ANDOYER => 3, + Self::KARNEY => 4, + Self::_Unknown(i) => i, + }; + writer.write_i32(val) + } +} + +impl WriteThriftField for EdgeInterpolationAlgorithm { + fn write_thrift_field( + &self, + writer: &mut ThriftCompactOutputProtocol, + field_id: i16, + last_field_id: i16, + ) -> Result { + writer.write_field_begin(FieldType::I32, field_id, last_field_id)?; + self.write_thrift(writer)?; + Ok(field_id) + } +} + +impl Default for EdgeInterpolationAlgorithm { + fn default() -> Self { + Self::SPHERICAL + } } -); // ---------------------------------------------------------------------- // Mirrors thrift union `BloomFilterAlgorithm` @@ -945,7 +1014,7 @@ enum EdgeInterpolationAlgorithm { thrift_union_all_empty!( /// The algorithm used in Bloom filter. union BloomFilterAlgorithm { - /** Block-based Bloom filter. **/ + /// Block-based Bloom filter. 1: SplitBlockAlgorithm BLOCK; } ); @@ -957,7 +1026,7 @@ thrift_union_all_empty!( /// The hash function used in Bloom filter. This function takes the hash of a column value /// using plain encoding. union BloomFilterHash { - /** xxHash Strategy. **/ + /// xxHash Strategy. 1: XxHash XXHASH; } ); @@ -1359,7 +1428,7 @@ impl str::FromStr for LogicalType { "GEOMETRY" => Ok(LogicalType::Geometry { crs: None }), "GEOGRAPHY" => Ok(LogicalType::Geography { crs: None, - algorithm: None, + algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL), }), other => Err(general_err!("Invalid parquet logical type {}", other)), } @@ -1816,6 +1885,17 @@ mod tests { ConvertedType::from(Some(LogicalType::Float16)), ConvertedType::NONE ); + assert_eq!( + ConvertedType::from(Some(LogicalType::Geometry { crs: None })), + ConvertedType::NONE + ); + assert_eq!( + ConvertedType::from(Some(LogicalType::Geography { + crs: None, + algorithm: Some(EdgeInterpolationAlgorithm::default()), + })), + ConvertedType::NONE + ); assert_eq!( ConvertedType::from(Some(LogicalType::Unknown)), ConvertedType::NONE @@ -1897,11 +1977,11 @@ mod tests { }); test_roundtrip(LogicalType::Geography { crs: Some("foo".to_owned()), - algorithm: None, + algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL), }); test_roundtrip(LogicalType::Geography { crs: None, - algorithm: None, + algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL), }); } @@ -2113,7 +2193,15 @@ mod tests { check_sort_order(signed, SortOrder::SIGNED); // Undefined comparison - let undefined = vec![LogicalType::List, LogicalType::Map]; + let undefined = vec![ + LogicalType::List, + LogicalType::Map, + LogicalType::Geometry { crs: None }, + LogicalType::Geography { + crs: None, + algorithm: Some(EdgeInterpolationAlgorithm::default()), + }, + ]; check_sort_order(undefined, SortOrder::UNDEFINED); } diff --git a/parquet/src/file/metadata/thrift_gen.rs b/parquet/src/file/metadata/thrift_gen.rs index 7a0b32bfe12e..489cb44cd77b 100644 --- a/parquet/src/file/metadata/thrift_gen.rs +++ b/parquet/src/file/metadata/thrift_gen.rs @@ -1585,7 +1585,7 @@ impl WriteThrift for crate::geospatial::statistics::GeospatialStatistics { fn write_thrift(&self, writer: &mut ThriftCompactOutputProtocol) -> Result<()> { let mut last_field_id = 0i16; - if let Some(bbox) = self.bbox() { + if let Some(bbox) = self.bounding_box() { last_field_id = bbox.write_thrift_field(writer, 1, last_field_id)?; } if let Some(geo_types) = self.geospatial_types() { diff --git a/parquet/src/geospatial/statistics.rs b/parquet/src/geospatial/statistics.rs index 2e99d9c62aff..d3287412b143 100644 --- a/parquet/src/geospatial/statistics.rs +++ b/parquet/src/geospatial/statistics.rs @@ -58,12 +58,12 @@ impl GeospatialStatistics { } } - /// Return the optional `BoundingBox`. - pub fn bbox(&self) -> Option<&BoundingBox> { + /// Optional bounding defining the spatial extent, where `None` represents a lack of information. + pub fn bounding_box(&self) -> Option<&BoundingBox> { self.bbox.as_ref() } - /// Return the optional list of geospatial types. + /// Optional list of geometry type identifiers, where `None` represents a lack of information. pub fn geospatial_types(&self) -> Option<&Vec> { self.geospatial_types.as_ref() } diff --git a/parquet/src/schema/printer.rs b/parquet/src/schema/printer.rs index fd28e13d2edf..0cc5df59f329 100644 --- a/parquet/src/schema/printer.rs +++ b/parquet/src/schema/printer.rs @@ -329,9 +329,20 @@ fn print_logical_and_converted( LogicalType::Variant { specification_version, } => format!("VARIANT({specification_version:?})"), - LogicalType::Geometry { crs } => format!("GEOMETRY({crs:?})"), + LogicalType::Geometry { crs } => { + if let Some(crs) = crs { + format!("GEOMETRY({crs})") + } else { + "GEOMETRY".to_string() + } + } LogicalType::Geography { crs, algorithm } => { - format!("GEOGRAPHY({crs:?},{algorithm:?})") + let algorithm = algorithm.unwrap_or_default(); + if let Some(crs) = crs { + format!("GEOGRAPHY({algorithm}, {crs})") + } else { + format!("GEOGRAPHY({algorithm})") + } } LogicalType::Unknown => "UNKNOWN".to_string(), LogicalType::_Unknown { field_id } => format!("_Unknown({field_id})"), @@ -454,7 +465,7 @@ mod tests { use std::sync::Arc; - use crate::basic::{Repetition, Type as PhysicalType}; + use crate::basic::{EdgeInterpolationAlgorithm, Repetition, Type as PhysicalType}; use crate::errors::Result; use crate::schema::parser::parse_message_type; @@ -784,6 +795,62 @@ mod tests { .unwrap(), "REQUIRED BYTE_ARRAY field [42] (STRING);", ), + ( + build_primitive_type( + "field", + None, + PhysicalType::BYTE_ARRAY, + Some(LogicalType::Geometry { crs: None }), + ConvertedType::NONE, + Repetition::REQUIRED, + ) + .unwrap(), + "REQUIRED BYTE_ARRAY field (GEOMETRY);", + ), + ( + build_primitive_type( + "field", + None, + PhysicalType::BYTE_ARRAY, + Some(LogicalType::Geometry { + crs: Some("non-missing CRS".to_string()), + }), + ConvertedType::NONE, + Repetition::REQUIRED, + ) + .unwrap(), + "REQUIRED BYTE_ARRAY field (GEOMETRY(non-missing CRS));", + ), + ( + build_primitive_type( + "field", + None, + PhysicalType::BYTE_ARRAY, + Some(LogicalType::Geography { + crs: None, + algorithm: Some(EdgeInterpolationAlgorithm::default()), + }), + ConvertedType::NONE, + Repetition::REQUIRED, + ) + .unwrap(), + "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL));", + ), + ( + build_primitive_type( + "field", + None, + PhysicalType::BYTE_ARRAY, + Some(LogicalType::Geography { + crs: Some("non-missing CRS".to_string()), + algorithm: Some(EdgeInterpolationAlgorithm::default()), + }), + ConvertedType::NONE, + Repetition::REQUIRED, + ) + .unwrap(), + "REQUIRED BYTE_ARRAY field (GEOGRAPHY(SPHERICAL, non-missing CRS));", + ), ]; types_and_strings.into_iter().for_each(|(field, expected)| { diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs new file mode 100644 index 000000000000..b3de40491b30 --- /dev/null +++ b/parquet/tests/geospatial.rs @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests for Geometry and Geography logical types +use parquet::{ + basic::{EdgeInterpolationAlgorithm, LogicalType}, + file::{ + metadata::ParquetMetaData, + reader::{FileReader, SerializedFileReader}, + }, + geospatial::bounding_box::BoundingBox, +}; +use serde_json::Value; +use std::fs::File; + +fn read_metadata(geospatial_test_file: &str) -> ParquetMetaData { + let path = format!( + "{}/geospatial/{geospatial_test_file}", + arrow::util::test_util::parquet_test_data(), + ); + let file = File::open(path).unwrap(); + let reader = SerializedFileReader::try_from(file).unwrap(); + reader.metadata().clone() +} + +#[test] +fn test_read_logical_type() { + // Some crs values are short strings + let expected_logical_type = [ + ("crs-default.parquet", LogicalType::Geometry { crs: None }), + ( + "crs-srid.parquet", + LogicalType::Geometry { + crs: Some("srid:5070".to_string()), + }, + ), + ( + "crs-projjson.parquet", + LogicalType::Geometry { + crs: Some("projjson:projjson_epsg_5070".to_string()), + }, + ), + ( + "crs-geography.parquet", + LogicalType::Geography { + crs: None, + algorithm: Some(EdgeInterpolationAlgorithm::SPHERICAL), + }, + ), + ]; + + for (geospatial_file, expected_type) in expected_logical_type { + let metadata = read_metadata(geospatial_file); + let logical_type = metadata + .file_metadata() + .schema_descr() + .column(1) + .logical_type() + .unwrap(); + + assert_eq!(logical_type, expected_type); + } + + // The crs value may also contain arbitrary values (in this case some JSON + // a bit too lengthy to type out) + let metadata = read_metadata("crs-arbitrary-value.parquet"); + let logical_type = metadata + .file_metadata() + .schema_descr() + .column(1) + .logical_type() + .unwrap(); + + if let LogicalType::Geometry { crs } = logical_type { + let crs_parsed: Value = serde_json::from_str(&crs.unwrap()).unwrap(); + assert_eq!(crs_parsed.get("id").unwrap().get("code").unwrap(), 5070); + } else { + panic!("Expected geometry type but got {logical_type:?}"); + } +} + +#[test] +fn test_read_geospatial_statistics() { + let metadata = read_metadata("geospatial.parquet"); + + // geospatial.parquet schema: + // optional binary field_id=-1 group (String); + // optional binary field_id=-1 wkt (String); + // optional binary field_id=-1 geometry (Geometry(crs=)); + let fields = metadata.file_metadata().schema().get_fields(); + let logical_type = fields[2].get_basic_info().logical_type().unwrap(); + assert_eq!(logical_type, LogicalType::Geometry { crs: None }); + + let geo_statistics = metadata.row_group(0).column(2).geo_statistics(); + assert!(geo_statistics.is_some()); + + let expected_bbox = BoundingBox::new(10.0, 40.0, 10.0, 40.0) + .with_zrange(30.0, 80.0) + .with_mrange(200.0, 1600.0); + let expected_geospatial_types = vec![ + 1, 2, 3, 4, 5, 6, 7, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 3001, 3002, 3003, 3004, 3005, 3006, 3007, + ]; + assert_eq!( + geo_statistics.unwrap().geospatial_types(), + Some(&expected_geospatial_types) + ); + assert_eq!(geo_statistics.unwrap().bounding_box(), Some(&expected_bbox)); +}