@@ -98,12 +98,20 @@ Status VStatisticsIterator::next_batch(Block* block) {
9898 return Status::EndOfFile (" End of VStatisticsIterator" );
9999}
100100
101+ // Build the block using the output schema, which contains only the columns
102+ // the caller requested (return_columns). Delete predicate columns are excluded
103+ // because SegmentIterator handles them independently:
104+ // - _init_current_block() skips predicate columns (including delete predicates)
105+ // via the _is_pred_column[cid] check, so it never accesses the block by those positions.
106+ // - _output_non_pred_columns() checks loc < block->columns() before filling any column,
107+ // so delete predicate columns (whose loc exceeds block->columns()) are simply skipped.
108+ // - Delete predicate evaluation happens entirely through _current_return_columns and
109+ // _evaluate_short_circuit_predicate(), which are independent of the block structure.
101110Status VMergeIteratorContext::block_reset (const std::shared_ptr<Block>& block) {
102111 if (!block->columns ()) {
103- const Schema& schema = _iter->schema ();
104- const auto & column_ids = schema.column_ids ();
105- for (size_t i = 0 ; i < schema.num_column_ids (); ++i) {
106- auto column_desc = schema.column (column_ids[i]);
112+ const auto & column_ids = _output_schema->column_ids ();
113+ for (size_t i = 0 ; i < _output_schema->num_column_ids (); ++i) {
114+ auto column_desc = _output_schema->column (column_ids[i]);
107115 auto data_type = Schema::get_data_type_ptr (*column_desc);
108116 if (data_type == nullptr ) {
109117 return Status::RuntimeError (" invalid data type" );
@@ -143,9 +151,15 @@ bool VMergeIteratorContext::compare(const VMergeIteratorContext& rhs) const {
143151 return result;
144152}
145153
154+ // Copy rows from the internal _block to the destination block.
155+ // Both blocks are built with the output schema (return_columns only), so they
156+ // have the same number of columns. We iterate over _output_schema->num_column_ids()
157+ // columns to copy from src to dst.
146158Status VMergeIteratorContext::copy_rows (Block* block, bool advanced) {
147159 Block& src = *_block;
148160 Block& dst = *block;
161+ DCHECK_EQ (src.columns (), _output_schema->num_column_ids ());
162+ DCHECK_EQ (dst.columns (), _output_schema->num_column_ids ());
149163 if (_cur_batch_num == 0 ) {
150164 return Status::OK ();
151165 }
@@ -154,7 +168,7 @@ Status VMergeIteratorContext::copy_rows(Block* block, bool advanced) {
154168 size_t start = _index_in_block - _cur_batch_num + 1 - advanced;
155169
156170 RETURN_IF_CATCH_EXCEPTION ({
157- for (size_t i = 0 ; i < _num_columns ; ++i) {
171+ for (size_t i = 0 ; i < _output_schema-> num_column_ids () ; ++i) {
158172 auto & s_col = src.get_by_position (i);
159173 auto & d_col = dst.get_by_position (i);
160174
@@ -344,13 +358,12 @@ Status VMergeIterator::init(const StorageReadOptions& opts) {
344358 if (_origin_iters.empty ()) {
345359 return Status::OK ();
346360 }
347- _schema = &(_origin_iters[0 ]->schema ());
348361 _record_rowids = opts.record_rowids ;
349362
350363 for (auto & iter : _origin_iters) {
351- auto ctx = std::make_shared<VMergeIteratorContext>(std::move (iter), _sequence_id_idx,
352- _is_unique, _is_reverse,
353- opts.read_orderby_key_columns );
364+ auto ctx = std::make_shared<VMergeIteratorContext>(
365+ std::move (iter), _sequence_id_idx, _is_unique, _is_reverse,
366+ opts.read_orderby_key_columns , _output_schema );
354367 RETURN_IF_ERROR (ctx->init (opts));
355368 if (!ctx->valid ()) {
356369 continue ;
@@ -366,20 +379,26 @@ Status VMergeIterator::init(const StorageReadOptions& opts) {
366379}
367380
368381// VUnionIterator will read data from input iterator one by one.
382+ // Unlike VMergeIterator, VUnionIterator does NOT have its own internal block or copy_rows().
383+ // It passes the caller's block directly to the underlying SegmentIterator via next_batch(),
384+ // so there is no input-schema vs output-schema mismatch issue here.
385+ // The output_schema parameter is accepted only so that schema() can return the output schema
386+ // consistently with VMergeIterator.
369387class VUnionIterator : public RowwiseIterator {
370388public:
371389 // Iterators' ownership it transferred to this class.
372390 // This class will delete all iterators when destructs
373391 // Client should not use iterators anymore.
374- VUnionIterator (std::vector<RowwiseIteratorUPtr>&& v) : _origin_iters(std::move(v)) {}
392+ VUnionIterator (std::vector<RowwiseIteratorUPtr>&& v, SchemaSPtr output_schema)
393+ : _output_schema(std::move(output_schema)), _origin_iters(std::move(v)) {}
375394
376395 ~VUnionIterator () override = default ;
377396
378397 Status init (const StorageReadOptions& opts) override ;
379398
380399 Status next_batch (Block* block) override ;
381400
382- const Schema& schema () const override { return *_schema ; }
401+ const Schema& schema () const override { return *_output_schema ; }
383402
384403 Status current_block_row_locations (std::vector<RowLocation>* locations) override ;
385404
@@ -390,7 +409,7 @@ class VUnionIterator : public RowwiseIterator {
390409 }
391410
392411private:
393- const Schema* _schema = nullptr ;
412+ const SchemaSPtr _output_schema ;
394413 RowwiseIteratorUPtr _cur_iter = nullptr ;
395414 StorageReadOptions _read_options;
396415 std::vector<RowwiseIteratorUPtr> _origin_iters;
@@ -400,7 +419,6 @@ Status VUnionIterator::init(const StorageReadOptions& opts) {
400419 if (_origin_iters.empty ()) {
401420 return Status::OK ();
402421 }
403-
404422 // we use back() and pop_back() of std::vector to handle each iterator,
405423 // so reverse the vector here to keep result block of next_batch to be
406424 // in the same order as the original segments.
@@ -409,7 +427,6 @@ Status VUnionIterator::init(const StorageReadOptions& opts) {
409427 _read_options = opts;
410428 _cur_iter = std::move (_origin_iters.back ());
411429 RETURN_IF_ERROR (_cur_iter->init (_read_options));
412- _schema = &_cur_iter->schema ();
413430 return Status::OK ();
414431}
415432
@@ -441,19 +458,20 @@ Status VUnionIterator::current_block_row_locations(std::vector<RowLocation>* loc
441458
442459RowwiseIteratorUPtr new_merge_iterator (std::vector<RowwiseIteratorUPtr>&& inputs,
443460 int sequence_id_idx, bool is_unique, bool is_reverse,
444- uint64_t * merged_rows) {
461+ uint64_t * merged_rows, SchemaSPtr output_schema ) {
445462 // when the size of inputs is 1, we also need to use VMergeIterator, because the
446463 // next_block_view function only be implemented in VMergeIterator. The reason why
447464 // the size of inputs is 1 is that the segment was filtered out by zone map or others.
448465 return std::make_unique<VMergeIterator>(std::move (inputs), sequence_id_idx, is_unique,
449- is_reverse, merged_rows);
466+ is_reverse, merged_rows, std::move (output_schema) );
450467}
451468
452- RowwiseIteratorUPtr new_union_iterator (std::vector<RowwiseIteratorUPtr>&& inputs) {
469+ RowwiseIteratorUPtr new_union_iterator (std::vector<RowwiseIteratorUPtr>&& inputs,
470+ SchemaSPtr output_schema) {
453471 if (inputs.size () == 1 ) {
454472 return std::move (inputs[0 ]);
455473 }
456- return std::make_unique<VUnionIterator>(std::move (inputs));
474+ return std::make_unique<VUnionIterator>(std::move (inputs), std::move (output_schema) );
457475}
458476
459477RowwiseIterator* new_vstatistics_iterator (std::shared_ptr<Segment> segment, const Schema& schema) {
0 commit comments