Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 7a9198a

Browse files
committed
Add useless sort nodes removal.
Signed-off-by: Ilya Enkovich <[email protected]>
1 parent a45c880 commit 7a9198a

File tree

2 files changed

+97
-0
lines changed

2 files changed

+97
-0
lines changed

omniscidb/QueryOptimizer/CanonicalizeQuery.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,78 @@ bool isCompoundAggregate(const AggExpr* agg) {
2222
return agg->aggType() == AggType::kStdDevSamp || agg->aggType() == AggType::kCorr;
2323
}
2424

25+
void removeFromValidSortUsers(Node* node, std::map<NodePtr, std::set<Node*>>& users) {
26+
for (size_t input_idx = 0; input_idx < node->inputCount(); ++input_idx) {
27+
auto input = node->getAndOwnInput(input_idx);
28+
if (input->is<Sort>()) {
29+
users.at(input).erase(node);
30+
} else if (!input->is<Aggregate>()) {
31+
removeFromValidSortUsers(input.get(), users);
32+
}
33+
}
34+
}
35+
36+
/**
37+
* Find sort nodes whose result's order is going to be ignored later. It happens if the
38+
* result is later aggregated or sorted again.
39+
*/
40+
void dropDeadSorts(QueryDag& dag) {
41+
auto nodes = dag.getNodes();
42+
std::list<NodePtr> node_list(nodes.begin(), nodes.end());
43+
std::map<NodePtr, std::list<NodePtr>::iterator> sorts;
44+
std::map<NodePtr, std::set<Node*>> valid_sort_users;
45+
std::map<NodePtr, std::set<Node*>> all_sort_users;
46+
47+
// Collect sort nodes and their users.
48+
for (auto node_itr = node_list.begin(); node_itr != node_list.end(); ++node_itr) {
49+
const auto node = *node_itr;
50+
51+
// Store positions of all sort nodes to be able to remove them later from the nodes
52+
// list.
53+
if (node->is<Sort>()) {
54+
sorts[node] = node_itr;
55+
// Root node is always considered to have a valid user. We also cannot drop nodes
56+
// with limit and/or offset specified.
57+
if (dag.getRootNode() == node.get() || node->as<Sort>()->getLimit() ||
58+
node->as<Sort>()->getOffset()) {
59+
valid_sort_users[node].insert(nullptr);
60+
}
61+
}
62+
63+
for (size_t input_idx = 0; input_idx < node->inputCount(); ++input_idx) {
64+
auto input = node->getAndOwnInput(input_idx);
65+
if (input->is<Sort>()) {
66+
all_sort_users[input].insert(node.get());
67+
valid_sort_users[input].insert(node.get());
68+
}
69+
}
70+
}
71+
72+
// Find sort and aggregate nodes and remove their inputs from valid sort users.
73+
for (auto node : node_list) {
74+
if (node->is<Aggregate>() || node->is<Sort>()) {
75+
removeFromValidSortUsers(node.get(), valid_sort_users);
76+
}
77+
}
78+
79+
// Remove sorts with no valid users.
80+
for (auto& pr : valid_sort_users) {
81+
if (pr.second.empty()) {
82+
auto sort = pr.first;
83+
for (auto user : all_sort_users.at(sort)) {
84+
user->replaceInput(sort, sort->getAndOwnInput(0));
85+
}
86+
node_list.erase(sorts.at(sort));
87+
}
88+
}
89+
90+
// Any applied transformation always decreases the number of nodes.
91+
if (node_list.size() != nodes.size()) {
92+
nodes.assign(node_list.begin(), node_list.end());
93+
dag.setNodes(std::move(nodes));
94+
}
95+
}
96+
2597
/**
2698
* Base class holding interface for compound aggregate expansion.
2799
* Compound aggregate is expanded in three steps.
@@ -470,6 +542,7 @@ void addWindowFunctionPreProject(
470542
} // namespace
471543

472544
void canonicalizeQuery(QueryDag& dag) {
545+
dropDeadSorts(dag);
473546
expandCompoundAggregates(dag);
474547
addWindowFunctionPreProject(dag);
475548
}

omniscidb/Tests/PartitionedGroupByTest.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,30 @@ TEST_F(PartitionedGroupByTest, AggregationWithSort) {
173173
compare_res_data(res, id1_vals, id2_vals, id3_vals, id4_vals, v1_sums, v2_sums);
174174
}
175175

176+
TEST_F(PartitionedGroupByTest, Issue695) {
177+
auto old_exec = config().exec;
178+
ScopeGuard g([&old_exec]() { config().exec = old_exec; });
179+
180+
config().exec.group_by.default_max_groups_buffer_entry_guess = 1;
181+
config().exec.group_by.big_group_threshold = 1;
182+
config().exec.group_by.enable_cpu_partitioned_groupby = true;
183+
config().exec.group_by.partitioning_buffer_size_threshold = 10;
184+
config().exec.group_by.partitioning_group_size_threshold = 1.5;
185+
config().exec.group_by.min_partitions = 2;
186+
config().exec.group_by.max_partitions = 8;
187+
config().exec.group_by.partitioning_buffer_target_size = 612;
188+
config().exec.enable_multifrag_execution_result = true;
189+
190+
QueryBuilder builder(ctx(), getSchemaProvider(), configPtr());
191+
auto scan = builder.scan("test1");
192+
auto dag1 = scan.agg({"id1"s, "id2"s, "id3"s, "id4"s}, {"sum(v1)"s, "sum(v2)"s})
193+
.sort(0)
194+
.sort({0, 1, 2, 3})
195+
.finalize();
196+
auto res = runQuery(std::move(dag1));
197+
compare_res_data(res, id1_vals, id2_vals, id3_vals, id4_vals, v1_sums, v2_sums);
198+
}
199+
176200
int main(int argc, char* argv[]) {
177201
TestHelpers::init_logger_stderr_only(argc, argv);
178202
testing::InitGoogleTest(&argc, argv);

0 commit comments

Comments
 (0)