1919#include < cstdint>
2020#include < memory>
2121#include < mutex>
22+ #include < optional>
2223#include < sstream>
2324#include < string>
2425#include < utility>
@@ -82,7 +83,7 @@ class ConcreteColumnBuilder : public ColumnBuilder {
8283 ReserveChunksUnlocked (block_index);
8384 }
8485
85- void ReserveChunksUnlocked (int64_t block_index) {
86+ virtual void ReserveChunksUnlocked (int64_t block_index) {
8687 // Create a null Array pointer at the back at the list.
8788 size_t chunk_index = static_cast <size_t >(block_index);
8889 if (chunks_.size () <= chunk_index) {
@@ -232,6 +233,7 @@ class InferringColumnBuilder : public ConcreteColumnBuilder {
232233 Status TryConvertChunk (int64_t chunk_index);
233234 // This must be called unlocked!
234235 void ScheduleConvertChunk (int64_t chunk_index);
236+ void ReserveChunksUnlocked (int64_t block_index) override ;
235237
236238 // CAUTION: ConvertOptions can grow large (if it customizes hundreds or
237239 // thousands of columns), so avoid copying it in each InferringColumnBuilder.
@@ -243,6 +245,9 @@ class InferringColumnBuilder : public ConcreteColumnBuilder {
243245
244246 // The parsers corresponding to each chunk (for reconverting)
245247 std::vector<std::shared_ptr<BlockParser>> parsers_;
248+
249+ // The inferrence kind for which the current chunks_ were obtained
250+ std::vector<std::optional<InferKind>> chunk_kinds_;
246251};
247252
248253Status InferringColumnBuilder::Init () { return UpdateType (); }
@@ -261,14 +266,20 @@ Status InferringColumnBuilder::TryConvertChunk(int64_t chunk_index) {
261266 std::shared_ptr<BlockParser> parser = parsers_[chunk_index];
262267 InferKind kind = infer_status_.kind ();
263268
264- DCHECK_NE (parser, nullptr );
269+ if (chunks_[chunk_index] && chunk_kinds_[chunk_index] == kind) {
270+ // Already tried, nothing to do
271+ return Status::OK ();
272+ }
273+
274+ DCHECK_NE (parser, nullptr ) << " for chunk_index " << chunk_index;
265275
266276 lock.unlock ();
267277 auto maybe_array = converter->Convert (*parser, col_index_);
268278 lock.lock ();
269279
270280 if (kind != infer_status_.kind ()) {
271281 // infer_kind_ was changed by another task, reconvert
282+ kind = infer_status_.kind ();
272283 lock.unlock ();
273284 ScheduleConvertChunk (chunk_index);
274285 return Status::OK ();
@@ -280,34 +291,45 @@ Status InferringColumnBuilder::TryConvertChunk(int64_t chunk_index) {
280291 // We won't try to reconvert anymore
281292 parsers_[chunk_index].reset ();
282293 }
294+ chunk_kinds_[chunk_index] = kind;
283295 return SetChunkUnlocked (chunk_index, maybe_array);
284296 }
285297
286298 // Conversion failed, try another type
287299 infer_status_.LoosenType (maybe_array.status ());
288300 RETURN_NOT_OK (UpdateType ());
301+ kind = infer_status_.kind ();
289302
290303 // Reconvert past finished chunks
291304 // (unfinished chunks will notice by themselves if they need reconverting)
292305 const auto nchunks = static_cast <int64_t >(chunks_.size ());
306+ std::vector<int64_t > chunks_to_reconvert;
293307 for (int64_t i = 0 ; i < nchunks; ++i) {
294- if (i != chunk_index && chunks_[i]) {
295- // We're assuming the chunk was converted using the wrong type
296- // (which should be true unless the executor reorders tasks)
308+ if (i != chunk_index && chunks_[i] && chunk_kinds_[i] != kind) {
309+ // That chunk was converted using the wrong type
297310 chunks_[i].reset ();
298- lock.unlock ();
299- ScheduleConvertChunk (i);
300- lock.lock ();
311+ chunk_kinds_[i].reset ();
312+ chunks_to_reconvert.push_back (i);
301313 }
302314 }
315+ // Reconvert this chunk too
316+ chunks_to_reconvert.push_back (chunk_index);
303317
304- // Reconvert this chunk
305318 lock.unlock ();
306- ScheduleConvertChunk (chunk_index);
307-
319+ for (auto i : chunks_to_reconvert) {
320+ ScheduleConvertChunk (i);
321+ }
308322 return Status::OK ();
309323}
310324
325+ void InferringColumnBuilder::ReserveChunksUnlocked (int64_t block_index) {
326+ ConcreteColumnBuilder::ReserveChunksUnlocked (block_index);
327+ size_t chunk_index = static_cast <size_t >(block_index);
328+ if (chunk_kinds_.size () <= chunk_index) {
329+ chunk_kinds_.resize (chunk_index + 1 );
330+ }
331+ }
332+
311333void InferringColumnBuilder::Insert (int64_t block_index,
312334 const std::shared_ptr<BlockParser>& parser) {
313335 // Create a slot for the new chunk and spawn a task to convert it
0 commit comments