Skip to content

Commit db7dca5

Browse files
committed
implement length() and split_offsets()
1 parent 90039fb commit db7dca5

File tree

1 file changed

+17
-2
lines changed

1 file changed

+17
-2
lines changed

src/iceberg/parquet/parquet_writer.cc

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,14 +90,25 @@ class ParquetWriter::Impl {
9090
return {}; // Already closed
9191
}
9292

93+
auto& metadata = writer_->metadata();
94+
split_offsets_.reserve(metadata->num_row_groups());
95+
for (int i = 0; i < metadata->num_row_groups(); ++i) {
96+
split_offsets_.push_back(metadata->RowGroup(i)->file_offset());
97+
}
9398
ICEBERG_ARROW_RETURN_NOT_OK(writer_->Close());
9499
writer_.reset();
100+
101+
ICEBERG_ARROW_ASSIGN_OR_RETURN(total_bytes_, output_stream_->Tell());
95102
ICEBERG_ARROW_RETURN_NOT_OK(output_stream_->Close());
96103
return {};
97104
}
98105

99106
bool Closed() const { return writer_ == nullptr; }
100107

108+
int64_t length() const { return total_bytes_; }
109+
110+
std::vector<int64_t> split_offsets() const { return split_offsets_; }
111+
101112
private:
102113
// TODO(gangwu): make memory pool configurable
103114
::arrow::MemoryPool* pool_ = ::arrow::default_memory_pool();
@@ -107,6 +118,10 @@ class ParquetWriter::Impl {
107118
std::shared_ptr<::arrow::io::OutputStream> output_stream_;
108119
// Parquet file writer to write ArrowArray.
109120
std::unique_ptr<::parquet::arrow::FileWriter> writer_;
121+
// Total length of the written Parquet file.
122+
int64_t total_bytes_;
123+
// Row group start offsets in the Parquet file.
124+
std::vector<int64_t> split_offsets_;
110125
};
111126

112127
ParquetWriter::~ParquetWriter() = default;
@@ -131,14 +146,14 @@ std::optional<int64_t> ParquetWriter::length() {
131146
if (!impl_->Closed()) {
132147
return std::nullopt;
133148
}
134-
return {};
149+
return impl_->length();
135150
}
136151

137152
std::vector<int64_t> ParquetWriter::split_offsets() {
138153
if (!impl_->Closed()) {
139154
return {};
140155
}
141-
return {};
156+
return impl_->split_offsets();
142157
}
143158

144159
void RegisterWriter() {

0 commit comments

Comments
 (0)