Skip to content

Commit 6948a29

Browse files
author
nullccxsy
committed
feat: add Metadata method to Reader and its implementations
- Introduced a new virtual method `Metadata()` in the `Reader` class to retrieve file metadata. - Implemented `Metadata()` in `AvroReader` to return key-value pairs from the Avro file's metadata. - Implemented `Metadata()` in `ParquetReader` to extract and return key-value pairs from the Parquet file's metadata.
1 parent 0fc573a commit 6948a29

File tree

5 files changed

+59
-0
lines changed

5 files changed

+59
-0
lines changed

src/iceberg/avro/avro_reader.cc

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,24 @@ class AvroReader::Impl {
173173
return arrow_schema;
174174
}
175175

176+
Result<std::unordered_map<std::string, std::string>> Metadata() {
177+
if (reader_ == nullptr) {
178+
return InvalidArgument("Reader is not opened");
179+
}
180+
181+
auto metadata = reader_->metadata();
182+
183+
std::unordered_map<std::string, std::string> metadata_map;
184+
metadata_map.reserve(metadata.size());
185+
186+
for (const auto& pair : metadata) {
187+
metadata_map.try_emplace(pair.first,
188+
std::string(pair.second.begin(), pair.second.end()));
189+
}
190+
191+
return metadata_map;
192+
}
193+
176194
private:
177195
Status InitReadContext() {
178196
context_ = std::make_unique<ReadContext>();
@@ -241,6 +259,10 @@ Result<std::optional<ArrowArray>> AvroReader::Next() { return impl_->Next(); }
241259

242260
Result<ArrowSchema> AvroReader::Schema() { return impl_->Schema(); }
243261

262+
Result<std::unordered_map<std::string, std::string>> AvroReader::Metadata() {
263+
return impl_->Metadata();
264+
}
265+
244266
Status AvroReader::Open(const ReaderOptions& options) {
245267
impl_ = std::make_unique<Impl>();
246268
return impl_->Open(options);

src/iceberg/avro/avro_reader.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT AvroReader : public Reader {
3939

4040
Result<ArrowSchema> Schema() final;
4141

42+
Result<std::unordered_map<std::string, std::string>> Metadata() final;
43+
4244
private:
4345
class Impl;
4446
std::unique_ptr<Impl> impl_;

src/iceberg/file_reader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ class ICEBERG_EXPORT Reader {
5454

5555
/// \brief Get the schema of the data.
5656
virtual Result<ArrowSchema> Schema() = 0;
57+
58+
/// \brief Get the metadata of the file
59+
virtual Result<std::unordered_map<std::string, std::string>> Metadata() = 0;
5760
};
5861

5962
/// \brief A split of the file to read.

src/iceberg/parquet/parquet_reader.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <arrow/record_batch.h>
2727
#include <arrow/result.h>
2828
#include <arrow/type.h>
29+
#include <arrow/util/key_value_metadata.h>
2930
#include <parquet/arrow/reader.h>
3031
#include <parquet/arrow/schema.h>
3132
#include <parquet/file_reader.h>
@@ -185,6 +186,31 @@ class ParquetReader::Impl {
185186
return arrow_schema;
186187
}
187188

189+
Result<std::unordered_map<std::string, std::string>> Metadata() {
190+
if (reader_ == nullptr) {
191+
return InvalidArgument("Reader is not opened");
192+
}
193+
194+
auto metadata = reader_->parquet_reader()->metadata();
195+
if (!metadata) {
196+
return InvalidArgument("Failed to get Parquet file metadata");
197+
}
198+
199+
auto kv_metadata = metadata->key_value_metadata();
200+
if (!kv_metadata) {
201+
return std::unordered_map<std::string, std::string>{};
202+
}
203+
204+
std::unordered_map<std::string, std::string> metadata_map;
205+
metadata_map.reserve(kv_metadata->size());
206+
207+
for (int i = 0; i < kv_metadata->size(); ++i) {
208+
metadata_map.try_emplace(kv_metadata->key(i), kv_metadata->value(i));
209+
}
210+
211+
return metadata_map;
212+
}
213+
188214
private:
189215
Status InitReadContext() {
190216
context_ = std::make_unique<ReadContext>();
@@ -251,6 +277,10 @@ Result<std::optional<ArrowArray>> ParquetReader::Next() { return impl_->Next();
251277

252278
Result<ArrowSchema> ParquetReader::Schema() { return impl_->Schema(); }
253279

280+
Result<std::unordered_map<std::string, std::string>> ParquetReader::Metadata() {
281+
return impl_->Metadata();
282+
}
283+
254284
Status ParquetReader::Open(const ReaderOptions& options) {
255285
impl_ = std::make_unique<Impl>();
256286
return impl_->Open(options);

src/iceberg/parquet/parquet_reader.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ class ICEBERG_BUNDLE_EXPORT ParquetReader : public Reader {
3939

4040
Result<ArrowSchema> Schema() final;
4141

42+
Result<std::unordered_map<std::string, std::string>> Metadata() final;
43+
4244
private:
4345
class Impl;
4446
std::unique_ptr<Impl> impl_;

0 commit comments

Comments
 (0)