@@ -90,14 +90,25 @@ class ParquetWriter::Impl {
9090 return {}; // Already closed
9191 }
9292
93+ auto & metadata = writer_->metadata ();
94+ split_offsets_.reserve (metadata->num_row_groups ());
95+ for (int i = 0 ; i < metadata->num_row_groups (); ++i) {
96+ split_offsets_.push_back (metadata->RowGroup (i)->file_offset ());
97+ }
9398 ICEBERG_ARROW_RETURN_NOT_OK (writer_->Close ());
9499 writer_.reset ();
100+
101+ ICEBERG_ARROW_ASSIGN_OR_RETURN (total_bytes_, output_stream_->Tell ());
95102 ICEBERG_ARROW_RETURN_NOT_OK (output_stream_->Close ());
96103 return {};
97104 }
98105
99106 bool Closed () const { return writer_ == nullptr ; }
100107
108+ int64_t length () const { return total_bytes_; }
109+
110+ std::vector<int64_t > split_offsets () const { return split_offsets_; }
111+
101112 private:
102113 // TODO(gangwu): make memory pool configurable
103114 ::arrow::MemoryPool* pool_ = ::arrow::default_memory_pool();
@@ -107,6 +118,10 @@ class ParquetWriter::Impl {
107118 std::shared_ptr<::arrow::io::OutputStream> output_stream_;
108119 // Parquet file writer to write ArrowArray.
109120 std::unique_ptr<::parquet::arrow::FileWriter> writer_;
121+ // Total length of the written Parquet file.
122+ int64_t total_bytes_;
123+ // Row group start offsets in the Parquet file.
124+ std::vector<int64_t > split_offsets_;
110125};
111126
112127ParquetWriter::~ParquetWriter () = default ;
@@ -131,14 +146,14 @@ std::optional<int64_t> ParquetWriter::length() {
131146 if (!impl_->Closed ()) {
132147 return std::nullopt ;
133148 }
134- return {} ;
149+ return impl_-> length () ;
135150}
136151
137152std::vector<int64_t > ParquetWriter::split_offsets () {
138153 if (!impl_->Closed ()) {
139154 return {};
140155 }
141- return {} ;
156+ return impl_-> split_offsets () ;
142157}
143158
144159void RegisterWriter () {
0 commit comments