@@ -47,7 +47,8 @@ namespace csv {
4747
4848class BlockParser ;
4949
50- class ConcreteColumnBuilder : public ColumnBuilder {
50+ class ConcreteColumnBuilder : public ColumnBuilder ,
51+ public std::enable_shared_from_this<ConcreteColumnBuilder> {
5152 public:
5253 explicit ConcreteColumnBuilder (MemoryPool* pool, std::shared_ptr<TaskGroup> task_group,
5354 int32_t col_index = -1 )
@@ -152,15 +153,17 @@ void NullColumnBuilder::Insert(int64_t block_index,
152153 const int32_t num_rows = parser->num_rows ();
153154 DCHECK_GE (num_rows, 0 );
154155
155- task_group_->Append ([this , block_index, num_rows]() -> Status {
156- std::unique_ptr<ArrayBuilder> builder;
157- RETURN_NOT_OK (MakeBuilder (pool_, type_, &builder));
158- std::shared_ptr<Array> res;
159- RETURN_NOT_OK (builder->AppendNulls (num_rows));
160- RETURN_NOT_OK (builder->Finish (&res));
161-
162- return SetChunk (block_index, res);
163- });
156+ // `self` keeps us alive, `this` allows easy access to derived / protected member vars
157+ task_group_->Append (
158+ [self = shared_from_this (), this , block_index, num_rows]() -> Status {
159+ std::unique_ptr<ArrayBuilder> builder;
160+ RETURN_NOT_OK (MakeBuilder (pool_, type_, &builder));
161+ std::shared_ptr<Array> res;
162+ RETURN_NOT_OK (builder->AppendNulls (num_rows));
163+ RETURN_NOT_OK (builder->Finish (&res));
164+
165+ return SetChunk (block_index, res);
166+ });
164167}
165168
166169// ////////////////////////////////////////////////////////////////////////
@@ -169,11 +172,11 @@ void NullColumnBuilder::Insert(int64_t block_index,
169172class TypedColumnBuilder : public ConcreteColumnBuilder {
170173 public:
171174 TypedColumnBuilder (const std::shared_ptr<DataType>& type, int32_t col_index,
172- const ConvertOptions& options, MemoryPool* pool,
173- const std::shared_ptr<TaskGroup>& task_group)
174- : ConcreteColumnBuilder(pool, task_group, col_index),
175+ std::shared_ptr< ConvertOptions> options, MemoryPool* pool,
176+ std::shared_ptr<TaskGroup> task_group)
177+ : ConcreteColumnBuilder(pool, std::move( task_group) , col_index),
175178 type_ (type),
176- options_(options) {}
179+ options_(std::move( options) ) {}
177180
178181 Status Init ();
179182
@@ -185,13 +188,13 @@ class TypedColumnBuilder : public ConcreteColumnBuilder {
185188 std::shared_ptr<DataType> type_;
186189 // CAUTION: ConvertOptions can grow large (if it customizes hundreds or
187190 // thousands of columns), so avoid copying it in each TypedColumnBuilder.
188- const ConvertOptions& options_;
191+ std::shared_ptr< ConvertOptions> options_;
189192
190193 std::shared_ptr<Converter> converter_;
191194};
192195
193196Status TypedColumnBuilder::Init () {
194- ARROW_ASSIGN_OR_RAISE (converter_, Converter::Make (type_, options_, pool_));
197+ ARROW_ASSIGN_OR_RAISE (converter_, Converter::Make (type_, * options_, pool_));
195198 return Status::OK ();
196199}
197200
@@ -202,7 +205,7 @@ void TypedColumnBuilder::Insert(int64_t block_index,
202205 ReserveChunks (block_index);
203206
204207 // We're careful that all references in the closure outlive the Append() call
205- task_group_->Append ([this , parser, block_index]() -> Status {
208+ task_group_->Append ([self = shared_from_this (), this , parser, block_index]() -> Status {
206209 return SetChunk (block_index, converter_->Convert (*parser, col_index_));
207210 });
208211}
@@ -212,11 +215,11 @@ void TypedColumnBuilder::Insert(int64_t block_index,
212215
213216class InferringColumnBuilder : public ConcreteColumnBuilder {
214217 public:
215- InferringColumnBuilder (int32_t col_index, const ConvertOptions& options,
216- MemoryPool* pool, const std::shared_ptr<TaskGroup>& task_group)
217- : ConcreteColumnBuilder(pool, task_group, col_index),
218- options_ (options),
219- infer_status_(options ) {}
218+ InferringColumnBuilder (int32_t col_index, std::shared_ptr< ConvertOptions> options,
219+ MemoryPool* pool, std::shared_ptr<TaskGroup> task_group)
220+ : ConcreteColumnBuilder(pool, std::move( task_group) , col_index),
221+ options_ (std::move( options) ),
222+ infer_status_(*options_ ) {}
220223
221224 Status Init ();
222225
@@ -237,7 +240,8 @@ class InferringColumnBuilder : public ConcreteColumnBuilder {
237240
238241 // CAUTION: ConvertOptions can grow large (if it customizes hundreds or
239242 // thousands of columns), so avoid copying it in each InferringColumnBuilder.
240- const ConvertOptions& options_;
243+ // However, it needs to be owned because of async task execution, hence shared_ptr.
244+ std::shared_ptr<ConvertOptions> options_;
241245
242246 // Current inference status
243247 InferStatus infer_status_;
@@ -257,7 +261,9 @@ Status InferringColumnBuilder::UpdateType() {
257261}
258262
259263void InferringColumnBuilder::ScheduleConvertChunk (int64_t chunk_index) {
260- task_group_->Append ([this , chunk_index]() { return TryConvertChunk (chunk_index); });
264+ task_group_->Append ([self = shared_from_this (), this , chunk_index]() {
265+ return TryConvertChunk (chunk_index);
266+ });
261267}
262268
263269Status InferringColumnBuilder::TryConvertChunk (int64_t chunk_index) {
@@ -361,26 +367,26 @@ Result<std::shared_ptr<ChunkedArray>> InferringColumnBuilder::Finish() {
361367
362368Result<std::shared_ptr<ColumnBuilder>> ColumnBuilder::Make (
363369 MemoryPool* pool, const std::shared_ptr<DataType>& type, int32_t col_index,
364- const ConvertOptions& options, const std::shared_ptr<TaskGroup>& task_group) {
365- auto ptr =
366- std::make_shared<TypedColumnBuilder>(type, col_index, options, pool, task_group);
370+ std::shared_ptr< ConvertOptions> options, std::shared_ptr<TaskGroup> task_group) {
371+ auto ptr = std::make_shared<TypedColumnBuilder>(type, col_index, std::move (options),
372+ pool, std::move ( task_group) );
367373 RETURN_NOT_OK (ptr->Init ());
368374 return ptr;
369375}
370376
371377Result<std::shared_ptr<ColumnBuilder>> ColumnBuilder::Make (
372- MemoryPool* pool, int32_t col_index, const ConvertOptions& options,
373- const std::shared_ptr<TaskGroup>& task_group) {
374- auto ptr =
375- std::make_shared<InferringColumnBuilder>(col_index, options, pool, task_group);
378+ MemoryPool* pool, int32_t col_index, std::shared_ptr< ConvertOptions> options,
379+ std::shared_ptr<TaskGroup> task_group) {
380+ auto ptr = std::make_shared<InferringColumnBuilder>(col_index, std::move (options), pool,
381+ std::move ( task_group) );
376382 RETURN_NOT_OK (ptr->Init ());
377383 return ptr;
378384}
379385
380386Result<std::shared_ptr<ColumnBuilder>> ColumnBuilder::MakeNull (
381387 MemoryPool* pool, const std::shared_ptr<DataType>& type,
382- const std::shared_ptr<TaskGroup>& task_group) {
383- return std::make_shared<NullColumnBuilder>(type, pool, task_group);
388+ std::shared_ptr<TaskGroup> task_group) {
389+ return std::make_shared<NullColumnBuilder>(type, pool, std::move ( task_group) );
384390}
385391
386392} // namespace csv
0 commit comments