diff --git a/src/iceberg/avro/avro_reader.cc b/src/iceberg/avro/avro_reader.cc index 048cd4997..64526123f 100644 --- a/src/iceberg/avro/avro_reader.cc +++ b/src/iceberg/avro/avro_reader.cc @@ -173,6 +173,24 @@ class AvroReader::Impl { return arrow_schema; } + Result> Metadata() { + if (reader_ == nullptr) { + return Invalid("Reader is not opened"); + } + + const auto& metadata = reader_->metadata(); + + std::unordered_map metadata_map; + metadata_map.reserve(metadata.size()); + + for (const auto& pair : metadata) { + metadata_map.insert_or_assign(pair.first, + std::string(pair.second.begin(), pair.second.end())); + } + + return metadata_map; + } + private: Status InitReadContext() { context_ = std::make_unique(); @@ -241,6 +259,10 @@ Result> AvroReader::Next() { return impl_->Next(); } Result AvroReader::Schema() { return impl_->Schema(); } +Result> AvroReader::Metadata() { + return impl_->Metadata(); +} + Status AvroReader::Open(const ReaderOptions& options) { impl_ = std::make_unique(); return impl_->Open(options); diff --git a/src/iceberg/avro/avro_reader.h b/src/iceberg/avro/avro_reader.h index 07737bb7b..24f95f5d4 100644 --- a/src/iceberg/avro/avro_reader.h +++ b/src/iceberg/avro/avro_reader.h @@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT AvroReader : public Reader { Result Schema() final; + Result> Metadata() final; + private: class Impl; std::unique_ptr impl_; diff --git a/src/iceberg/file_reader.h b/src/iceberg/file_reader.h index 8a59e33fe..d25a5e451 100644 --- a/src/iceberg/file_reader.h +++ b/src/iceberg/file_reader.h @@ -54,6 +54,9 @@ class ICEBERG_EXPORT Reader { /// \brief Get the schema of the data. virtual Result Schema() = 0; + + /// \brief Get the metadata of the file. + virtual Result> Metadata() = 0; }; /// \brief A split of the file to read. diff --git a/src/iceberg/parquet/parquet_reader.cc b/src/iceberg/parquet/parquet_reader.cc index 4c86802b3..e57b98e87 100644 --- a/src/iceberg/parquet/parquet_reader.cc +++ b/src/iceberg/parquet/parquet_reader.cc @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -185,6 +186,27 @@ class ParquetReader::Impl { return arrow_schema; } + Result> Metadata() { + if (reader_ == nullptr) { + return Invalid("Reader is not opened"); + } + + auto metadata = reader_->parquet_reader()->metadata(); + if (!metadata) { + return Invalid("Failed to get Parquet file metadata"); + } + + const auto& kv_metadata = metadata->key_value_metadata(); + if (!kv_metadata) { + return std::unordered_map{}; + } + + std::unordered_map metadata_map; + kv_metadata->ToUnorderedMap(&metadata_map); + + return metadata_map; + } + private: Status InitReadContext() { context_ = std::make_unique(); @@ -251,6 +273,10 @@ Result> ParquetReader::Next() { return impl_->Next(); Result ParquetReader::Schema() { return impl_->Schema(); } +Result> ParquetReader::Metadata() { + return impl_->Metadata(); +} + Status ParquetReader::Open(const ReaderOptions& options) { impl_ = std::make_unique(); return impl_->Open(options); diff --git a/src/iceberg/parquet/parquet_reader.h b/src/iceberg/parquet/parquet_reader.h index 23d34dfa9..0604230c8 100644 --- a/src/iceberg/parquet/parquet_reader.h +++ b/src/iceberg/parquet/parquet_reader.h @@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT ParquetReader : public Reader { Result Schema() final; + Result> Metadata() final; + private: class Impl; std::unique_ptr impl_;