Skip to content

Commit b71aa7c

Browse files
ti-chi-botwindtalkerJaySon-Huang
authored
Fix spill crash (#10004) (#10014)
close #9999 Fix sort spill crash in some corner cases. Signed-off-by: xufei <[email protected]> Co-authored-by: xufei <[email protected]> Co-authored-by: xufei <[email protected]> Co-authored-by: JaySon <[email protected]>
1 parent 9ff8d76 commit b71aa7c

File tree

4 files changed

+71
-4
lines changed

4 files changed

+71
-4
lines changed

dbms/src/DataStreams/FilterTransformAction.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,18 @@ bool FilterTransformAction::transform(Block & block, FilterPtr & res_filter, boo
102102
* and now - are calculated. That is, not all cases are covered by the code above.
103103
* This happens if the function returns a constant for a non-constant argument.
104104
* For example, `ignore` function.
105+
* use a local variable to avoid the case that function return a constant for a non-constant argument in one block
106+
* but return a non-constant for the same argument in another block.
105107
*/
106-
constant_filter_description = ConstantFilterDescription(*column_of_filter);
108+
auto current_constant_filter_description = ConstantFilterDescription(*column_of_filter);
107109

108-
if (constant_filter_description.always_false)
110+
if (current_constant_filter_description.always_false)
109111
{
110112
block.clear();
111113
return true;
112114
}
113115

114-
if (constant_filter_description.always_true)
116+
if (current_constant_filter_description.always_true)
115117
{
116118
if (return_filter)
117119
res_filter = nullptr;
@@ -140,7 +142,7 @@ bool FilterTransformAction::transform(Block & block, FilterPtr & res_filter, boo
140142
if (filtered_rows == rows)
141143
{
142144
/// Replace the column with the filter by a constant.
143-
auto filter_column = block.safeGetByPosition(filter_column_position);
145+
auto & filter_column = block.safeGetByPosition(filter_column_position);
144146
filter_column.column = filter_column.type->createColumnConst(filtered_rows, static_cast<UInt64>(1));
145147
/// No need to touch the rest of the columns.
146148
return true;

dbms/src/DataStreams/MergeSortingBlockInputStream.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ Block MergeSortingBlockInputStream::readImpl()
8888
return block;
8989

9090
SortHelper::removeConstantsFromBlock(block);
91+
RUNTIME_CHECK_MSG(
92+
block.columns() == header_without_constants.columns(),
93+
"Unexpected number of constant columns in block in MergeSortingBlockInputStream, n_block={}, n_head={}",
94+
block.columns(),
95+
header_without_constants.columns());
9196

9297
blocks.push_back(block);
9398
sum_bytes_in_blocks += block.estimateBytesForSpill();

dbms/src/Flash/tests/gtest_spill_sort.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include <Functions/FunctionHelpers.h>
1516
#include <Interpreters/Context.h>
1617
#include <TestUtils/ColumnGenerator.h>
1718
#include <TestUtils/ExecutorTestUtils.h>
1819
#include <TestUtils/mockExecutor.h>
20+
#include <common/types.h>
1921

2022
namespace DB
2123
{
@@ -166,5 +168,58 @@ try
166168
}
167169
CATCH
168170

171+
TEST_F(SpillSortTestRunner, SpillAfterFilter)
172+
try
173+
{
174+
DB::MockColumnInfoVec column_infos{
175+
{"a", TiDB::TP::TypeTiny},
176+
{"b", TiDB::TP::TypeTiny},
177+
{"c", TiDB::TP::TypeTiny},
178+
{"d", TiDB::TP::TypeTiny},
179+
{"e", TiDB::TP::TypeTiny}};
180+
ColumnsWithTypeAndName column_data;
181+
size_t table_rows = 102400;
182+
UInt64 max_block_size = 64;
183+
size_t total_data_size = 0;
184+
size_t limit_size = table_rows / 10 * 9;
185+
for (const auto & column_info : mockColumnInfosToTiDBColumnInfos(column_infos))
186+
{
187+
ColumnGeneratorOpts opts{
188+
table_rows,
189+
getDataTypeByColumnInfoForComputingLayer(column_info)->getName(),
190+
RANDOM,
191+
column_info.name};
192+
column_data.push_back(ColumnGenerator::instance().generate(opts));
193+
total_data_size += column_data.back().column->byteSize();
194+
}
195+
context.addMockTable("spill_sort_test", "simple_table", column_infos, column_data, 8);
196+
197+
MockOrderByItemVec order_by_items{
198+
std::make_pair("a", true),
199+
std::make_pair("b", true),
200+
std::make_pair("c", true),
201+
std::make_pair("d", true),
202+
std::make_pair("e", true)};
203+
204+
auto request = context.scan("spill_sort_test", "simple_table")
205+
.filter(gt(col("d"), lit(toField(static_cast<Int8>(-128)))))
206+
.topN(order_by_items, limit_size)
207+
.project({gt(col("d"), lit(toField(static_cast<Int8>(-128))))})
208+
.build(context);
209+
context.context->setSetting("max_block_size", Field(static_cast<UInt64>(max_block_size)));
210+
211+
/// disable spill
212+
context.context->setSetting("max_bytes_before_external_sort", Field(static_cast<UInt64>(0)));
213+
auto ref_columns = executeStreams(request, 1);
214+
215+
// The implementation of topN in the pipeline model is LocalSort, and the result of using multiple threads is unstable. Therefore, a single thread is used here instead.
216+
enablePipeline(true);
217+
context.context->setSetting("max_bytes_before_external_sort", Field(static_cast<UInt64>(total_data_size / 10)));
218+
ASSERT_COLUMNS_EQ_R(ref_columns, executeStreams(request, 1));
219+
context.context->setSetting("max_cached_data_bytes_in_spiller", Field(static_cast<UInt64>(total_data_size / 100)));
220+
ASSERT_COLUMNS_EQ_R(ref_columns, executeStreams(request, 1));
221+
}
222+
CATCH
223+
169224
} // namespace tests
170225
} // namespace DB

dbms/src/Operators/MergeSortTransformOp.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,11 @@ OperatorStatus MergeSortTransformOp::transformImpl(Block & block)
149149

150150
// store the sorted block in `sorted_blocks`.
151151
SortHelper::removeConstantsFromBlock(block);
152+
RUNTIME_CHECK_MSG(
153+
block.columns() == header_without_constants.columns(),
154+
"Unexpected number of constant columns in block in MergeSortTransformOp, n_block={}, n_head={}",
155+
block.columns(),
156+
header_without_constants.columns());
152157
sum_bytes_in_blocks += block.estimateBytesForSpill();
153158
sorted_blocks.emplace_back(std::move(block));
154159

0 commit comments

Comments
 (0)