Skip to content

Commit a1e482c

Browse files
authored
[opt](sort)use HybridSorter to choose between timsort and pdqsort (#59207)
### What problem does this PR solve? Timsort is faster than pdqsort for partially-ordered data. Introduce a HybridSorter that samples at sort time to choose between timsort and pdqsort. This optimization can be disabled with enable_use_hybrid_sort; it is enabled by default. ``` Run on (128 X 2250.02 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 1024 KiB (x64) L3 Unified 16384 KiB (x8) Load Average: 36.25, 46.31, 138.22 -------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... -------------------------------------------------------------------------------- BM_PdqSort/10000000/0 350 ms 350 ms 2 items_per_second=28.5574M/s random BM_PdqSort/10000000/1 216 ms 216 ms 3 items_per_second=46.3077M/s ascending_saw BM_PdqSort/10000000/2 217 ms 217 ms 3 items_per_second=46.1045M/s descending_saw BM_PdqSort/10000000/3 274 ms 274 ms 3 items_per_second=36.4636M/s generic BM_PdqSort/10000000/4 326 ms 326 ms 2 items_per_second=30.6559M/s random_tail BM_PdqSort/10000000/5 346 ms 346 ms 2 items_per_second=28.8922M/s random_half BM_PdqSort/10000000/6 230 ms 230 ms 3 items_per_second=43.5162M/s wave BM_TimSort/10000000/0 1113 ms 1113 ms 1 items_per_second=8.98417M/s random BM_TimSort/10000000/1 89.9 ms 89.9 ms 8 items_per_second=111.209M/s ascending_saw BM_TimSort/10000000/2 91.0 ms 91.0 ms 8 items_per_second=109.926M/s descending_saw BM_TimSort/10000000/3 533 ms 533 ms 1 items_per_second=18.7505M/s generic BM_TimSort/10000000/4 228 ms 228 ms 3 items_per_second=43.7805M/s random_tail BM_TimSort/10000000/5 559 ms 559 ms 1 items_per_second=17.8817M/s random_half BM_TimSort/10000000/6 87.4 ms 87.4 ms 8 items_per_second=114.384M/s wave ```
1 parent 9d34c04 commit a1e482c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+352
-90
lines changed

be/src/pipeline/exec/sort_sink_operator.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,9 @@ Status SortSinkLocalState::open(RuntimeState* state) {
4848
switch (p._algorithm) {
4949
case TSortAlgorithm::HEAP_SORT: {
5050
_shared_state->sorter = vectorized::HeapSorter::create_shared(
51-
_vsort_exec_exprs, p._limit, p._offset, p._pool, p._is_asc_order, p._nulls_first,
52-
p._child->row_desc(), state->get_query_ctx()->has_runtime_predicate(p._node_id));
51+
_vsort_exec_exprs, state, p._limit, p._offset, p._pool, p._is_asc_order,
52+
p._nulls_first, p._child->row_desc(),
53+
state->get_query_ctx()->has_runtime_predicate(p._node_id));
5354
break;
5455
}
5556
case TSortAlgorithm::TOPN_SORT: {

be/src/runtime/runtime_state.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -687,6 +687,11 @@ class RuntimeState {
687687
}
688688
}
689689

690+
MOCK_FUNCTION bool enable_use_hybrid_sort() const {
691+
return _query_options.__isset.enable_use_hybrid_sort &&
692+
_query_options.enable_use_hybrid_sort;
693+
}
694+
690695
void set_max_operator_id(int max_operator_id) { _max_operator_id = max_operator_id; }
691696

692697
int max_operator_id() const { return _max_operator_id; }

be/src/vec/aggregate_functions/aggregate_function_sort.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,10 @@ struct AggregateFunctionSortData {
110110
}
111111
}
112112

113-
void sort() { sort_block(block, block, sort_desc, block.rows()); }
113+
void sort() {
114+
HybridSorter hybrid_sorter;
115+
sort_block(block, block, sort_desc, hybrid_sorter, block.rows());
116+
}
114117
};
115118

116119
template <typename Data>

be/src/vec/columns/column.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "vec/common/string_ref.h"
3737
#include "vec/common/typeid_cast.h"
3838
#include "vec/core/field.h"
39+
#include "vec/core/hybrid_sorter.h"
3940
#include "vec/core/types.h"
4041

4142
namespace doris {
@@ -491,11 +492,19 @@ class IColumn : public COW<IColumn> {
491492
* nan_direction_hint - see above.
492493
*/
493494
virtual void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
494-
Permutation& res) const {
495+
HybridSorter& sorter, Permutation& res) const {
495496
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
496497
"get_permutation for " + get_name());
497498
}
498499

500+
#ifdef BE_TEST
501+
void get_permutation_default(bool reverse, size_t limit, int nan_direction_hint,
502+
Permutation& res) const {
503+
HybridSorter sorter;
504+
get_permutation(reverse, limit, nan_direction_hint, sorter, res);
505+
}
506+
#endif
507+
499508
/** Split column to smaller columns. Each value goes to column index, selected by corresponding element of 'selector'.
500509
* Selector must contain values from 0 to num_columns - 1.
501510
* For default implementation, see column_impl.h

be/src/vec/columns/column_array.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -241,17 +241,17 @@ struct ColumnArray::less {
241241
};
242242

243243
void ColumnArray::get_permutation(bool reverse, size_t limit, int nan_direction_hint,
244-
IColumn::Permutation& res) const {
244+
HybridSorter& sorter, IColumn::Permutation& res) const {
245245
size_t s = size();
246246
res.resize(s);
247247
for (size_t i = 0; i < s; ++i) {
248248
res[i] = i;
249249
}
250250

251251
if (reverse) {
252-
pdqsort(res.begin(), res.end(), ColumnArray::less<false>(*this, nan_direction_hint));
252+
sorter.sort(res.begin(), res.end(), ColumnArray::less<false>(*this, nan_direction_hint));
253253
} else {
254-
pdqsort(res.begin(), res.end(), ColumnArray::less<true>(*this, nan_direction_hint));
254+
sorter.sort(res.begin(), res.end(), ColumnArray::less<true>(*this, nan_direction_hint));
255255
}
256256
}
257257

be/src/vec/columns/column_array.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ class ColumnArray final : public COWHelper<IColumn, ColumnArray> {
171171
size_t allocated_bytes() const override;
172172
bool has_enough_capacity(const IColumn& src) const override;
173173
void insert_many_from(const IColumn& src, size_t position, size_t length) override;
174-
void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
174+
void get_permutation(bool reverse, size_t limit, int nan_direction_hint, HybridSorter& sorter,
175175
IColumn::Permutation& res) const override;
176176
void sort_column(const ColumnSorter* sorter, EqualFlags& flags, IColumn::Permutation& perms,
177177
EqualRange& range, bool last_column) const override;

be/src/vec/columns/column_const.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ MutableColumnPtr ColumnConst::permute(const Permutation& perm, size_t limit) con
101101
}
102102

103103
void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_direction_hint*/,
104-
Permutation& res) const {
104+
HybridSorter& /*sorter*/, Permutation& res) const {
105105
res.resize(s);
106106
for (size_t i = 0; i < s; ++i) {
107107
res[i] = i;

be/src/vec/columns/column_const.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ class ColumnConst final : public COWHelper<IColumn, ColumnConst> {
225225

226226
MutableColumnPtr permute(const Permutation& perm, size_t limit) const override;
227227
// ColumnPtr index(const IColumn & indexes, size_t limit) const override;
228-
void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
228+
void get_permutation(bool reverse, size_t limit, int nan_direction_hint, HybridSorter& sorter,
229229
Permutation& res) const override;
230230

231231
size_t byte_size() const override { return s > 0 ? data->byte_size() + sizeof(s) : 0; }

be/src/vec/columns/column_decimal.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -286,18 +286,16 @@ Field ColumnDecimal<T>::operator[](size_t n) const {
286286
}
287287

288288
template <PrimitiveType T>
289-
void ColumnDecimal<T>::get_permutation(bool reverse, size_t limit, int,
289+
void ColumnDecimal<T>::get_permutation(bool reverse, size_t limit, int, HybridSorter& sorter,
290290
IColumn::Permutation& res) const {
291-
#if 1 /// TODO: perf test
292291
if (data.size() <= std::numeric_limits<UInt32>::max()) {
293292
PaddedPODArray<UInt32> tmp_res;
294-
permutation(reverse, limit, tmp_res);
293+
permutation(reverse, limit, sorter, tmp_res);
295294

296295
res.resize(tmp_res.size());
297296
for (size_t i = 0; i < tmp_res.size(); ++i) res[i] = tmp_res[i];
298297
return;
299298
}
300-
#endif
301299
}
302300

303301
template <PrimitiveType T>

be/src/vec/columns/column_decimal.h

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ class ColumnDecimal final : public COWHelper<IColumn, ColumnDecimal<T>> {
185185
const uint8_t* __restrict null_data) const override;
186186

187187
int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override;
188-
void get_permutation(bool reverse, size_t limit, int nan_direction_hint,
188+
void get_permutation(bool reverse, size_t limit, int nan_direction_hint, HybridSorter& sorter,
189189
IColumn::Permutation& res) const override;
190190

191191
MutableColumnPtr clone_resized(size_t size) const override;
@@ -264,7 +264,8 @@ class ColumnDecimal final : public COWHelper<IColumn, ColumnDecimal<T>> {
264264
Container data;
265265
UInt32 scale;
266266
template <typename U>
267-
void permutation(bool reverse, size_t limit, PaddedPODArray<U>& res) const {
267+
void permutation(bool reverse, size_t limit, HybridSorter& sorter,
268+
PaddedPODArray<U>& res) const {
268269
size_t s = data.size();
269270
res.resize(s);
270271
for (U i = 0; i < s; ++i) res[i] = i;
@@ -280,11 +281,11 @@ class ColumnDecimal final : public COWHelper<IColumn, ColumnDecimal<T>> {
280281
[this](size_t a, size_t b) { return data[a] < data[b]; });
281282
} else {
282283
if (reverse)
283-
pdqsort(res.begin(), res.end(),
284-
[this](size_t a, size_t b) { return data[a] > data[b]; });
284+
sorter.sort(res.begin(), res.end(),
285+
[this](size_t a, size_t b) { return data[a] > data[b]; });
285286
else
286-
pdqsort(res.begin(), res.end(),
287-
[this](size_t a, size_t b) { return data[a] < data[b]; });
287+
sorter.sort(res.begin(), res.end(),
288+
[this](size_t a, size_t b) { return data[a] < data[b]; });
288289
}
289290
}
290291

0 commit comments

Comments
 (0)