Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 590bce6

Browse files
committed
Support UNNEST in projections.
Signed-off-by: ienkovich <[email protected]>
1 parent a8074d6 commit 590bce6

14 files changed

+439
-39
lines changed

omniscidb/IR/Node.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66
*/
77

88
#include "Node.h"
9+
#include "ExprCollector.h"
910
#include "InputRewriter.h"
11+
#include "UnnestDetector.h"
1012

1113
namespace hdk::ir {
1214

@@ -128,6 +130,15 @@ bool Project::hasWindowFunctionExpr() const {
128130
return false;
129131
}
130132

133+
bool Project::hasUnnestExpr() const {
134+
for (auto& expr : exprs_) {
135+
if (UnnestDetector::collect(expr.get())) {
136+
return true;
137+
}
138+
}
139+
return false;
140+
}
141+
131142
bool Project::isIdentity() const {
132143
if (!isSimple()) {
133144
return false;

omniscidb/IR/Node.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ class Project : public Node {
337337
void appendInput(std::string new_field_name, ExprPtr expr);
338338

339339
bool hasWindowFunctionExpr() const;
340+
bool hasUnnestExpr() const;
340341

341342
std::string toString() const override {
342343
return cat(::typeName(this),

omniscidb/IR/UnnestDetector.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* Copyright (C) 2023 Intel Corporation
3+
*
4+
* SPDX-License-Identifier: Apache-2.0
5+
*/
6+
7+
#pragma once
8+
9+
#include "ExprCollector.h"
10+
11+
namespace hdk::ir {
12+
13+
class UnnestDetector : public ExprCollector<bool, UnnestDetector> {
14+
public:
15+
UnnestDetector() { result_ = false; }
16+
17+
protected:
18+
void visitUOper(const hdk::ir::UOper* uoper) override {
19+
if (uoper->opType() == OpType::kUnnest) {
20+
result_ = true;
21+
return;
22+
}
23+
BaseClass::visitUOper(uoper);
24+
}
25+
};
26+
27+
} // namespace hdk::ir

omniscidb/QueryEngine/Execute.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#include "QueryEngine/RuntimeFunctions.h"
7070
#include "QueryEngine/SpeculativeTopN.h"
7171
#include "QueryEngine/StringDictionaryGenerations.h"
72+
#include "QueryEngine/UnnestedVarsCollector.h"
7273
#include "QueryEngine/Visitors/TransientStringLiteralsVisitor.h"
7374
#include "ResultSet/ColRangeInfo.h"
7475
#include "Shared/checked_alloc.h"
@@ -1759,10 +1760,12 @@ hdk::ResultSetTable Executor::executeWorkUnit(
17591760
}
17601761
};
17611762

1763+
bool has_proj_unnest =
1764+
!UnnestedVarsCollector::collect(ra_exe_unit_in.target_exprs).empty();
17621765
try {
17631766
auto result = executeWorkUnitImpl(max_groups_buffer_entry_guess,
17641767
is_agg,
1765-
true,
1768+
!has_proj_unnest,
17661769
query_infos,
17671770
ra_exe_unit_in,
17681771
co,

omniscidb/QueryEngine/Execute.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,8 @@ class Executor : public StringDictionaryProxyProvider {
825825
llvm::Value* arrayLoopCodegen(const hdk::ir::Expr* array_expr,
826826
std::stack<llvm::BasicBlock*>& array_loops,
827827
DiamondCodegen& diamond_codegen,
828-
const CompilationOptions& co);
828+
const CompilationOptions& co,
829+
llvm::Value* array_size = nullptr);
829830

830831
llvm::Value* castToFP(llvm::Value*,
831832
const hdk::ir::Type* from_type,

omniscidb/QueryEngine/IRCodegen.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,7 +1226,8 @@ Executor::GroupColLLVMValue Executor::groupByColumnCodegen(
12261226
llvm::Value* Executor::arrayLoopCodegen(const hdk::ir::Expr* array_expr,
12271227
std::stack<llvm::BasicBlock*>& array_loops,
12281228
DiamondCodegen& diamond_codegen,
1229-
const CompilationOptions& co) {
1229+
const CompilationOptions& co,
1230+
llvm::Value* array_size) {
12301231
AUTOMATIC_IR_METADATA(cgen_state_.get());
12311232
CodeGenerator code_generator(this, co.codegen_traits_desc);
12321233
auto array_lv = code_generator.codegen(array_expr, true, co).front();
@@ -1257,7 +1258,8 @@ llvm::Value* Executor::arrayLoopCodegen(const hdk::ir::Expr* array_expr,
12571258
CHECK(array_type->isArray());
12581259
auto elem_type = array_type->as<hdk::ir::ArrayBaseType>()->elemType();
12591260
auto array_len =
1260-
(array_type->size() > 0)
1261+
array_size ? array_size
1262+
: (array_type->size() > 0)
12611263
? cgen_state_->llInt(array_type->size() / elem_type->size())
12621264
: cgen_state_->emitExternalCall(
12631265
"array_size",

omniscidb/QueryEngine/QueryExecutionSequence.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include "QueryExecutionSequence.h"
88
#include "ScalarExprVisitor.h"
99

10+
#include "IR/UnnestDetector.h"
11+
1012
#include <boost/graph/adjacency_list.hpp>
1113
#include <boost/graph/topological_sort.hpp>
1214

@@ -115,6 +117,40 @@ class QueryExecutionSequenceImpl {
115117
execution_points_.insert(node->getInput(0));
116118
}
117119

120+
// Similar limitation applies to the UNNEST operation. We support
121+
// only column references as an unnest operand in projections and
122+
// therefore have to execution its input. Also, we don't have any
123+
// special unnesting expression in the execution unit and rely on
124+
// target expression. So, subsequent nodes that drop unnested arrays
125+
// can completely remove unnesting from execution units, so all
126+
// projections with UNNEST operation should be executed directly.
127+
// We also support UNNEST operation in aggregation keys. So, allow
128+
// to merge a projection with appropriate aggregate node.
129+
// TODO: add unnest_exprs to execution unit to enable merge with
130+
// other nodes.
131+
if (node->is<ir::Project>() && node->as<ir::Project>()->hasUnnestExpr()) {
132+
execution_points_.insert(node->getInput(0));
133+
bool can_be_merged = false;
134+
auto [start, end] = boost::in_edges(node_to_vertex_[node], graph_);
135+
if (start != end) {
136+
auto parent_node = graph_[start->m_source];
137+
if (parent_node->is<ir::Aggregate>()) {
138+
size_t keys_count = parent_node->as<ir::Aggregate>()->getGroupByCount();
139+
auto proj = node->as<ir::Project>();
140+
can_be_merged = true;
141+
for (size_t i = keys_count; i < proj->size(); ++i) {
142+
if (ir::UnnestDetector::collect(proj->getExpr(i))) {
143+
can_be_merged = false;
144+
break;
145+
}
146+
}
147+
}
148+
}
149+
if (!can_be_merged) {
150+
execution_points_.insert(node);
151+
}
152+
}
153+
118154
// Currently, we cannot merge union code into any other execution
119155
// module. Therefore, mark it and all its inputs as execution points.
120156
// TODO: UNION ALL should be able to be merged into other execution
@@ -175,7 +211,8 @@ class QueryExecutionSequenceImpl {
175211
CHECK(start != end);
176212
auto node = graph_[start->m_source];
177213

178-
if (node->is<ir::Project>() && !node->as<ir::Project>()->hasWindowFunctionExpr()) {
214+
if (node->is<ir::Project>() && !node->as<ir::Project>()->hasWindowFunctionExpr() &&
215+
!node->as<ir::Project>()->hasUnnestExpr()) {
179216
// In case of aggregation we allow only 'simple' projections which
180217
// don't have complex expressions referencing aggregate exprs.
181218
bool is_simple = true;

omniscidb/QueryEngine/RelAlgExecutor.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "RelAlgExecutor.h"
1818
#include "DataMgr/DataMgr.h"
1919
#include "IR/TypeUtils.h"
20+
#include "QueryBuilder/QueryBuilder.h"
2021
#include "QueryEngine/CalciteDeserializerUtils.h"
2122
#include "QueryEngine/CardinalityEstimator.h"
2223
#include "QueryEngine/ColumnFetcher.h"
@@ -35,6 +36,7 @@
3536
#include "QueryEngine/RelAlgVisitor.h"
3637
#include "QueryEngine/ResultSetBuilder.h"
3738
#include "QueryEngine/ResultSetSort.h"
39+
#include "QueryEngine/UnnestedVarsCollector.h"
3840
#include "QueryEngine/WindowContext.h"
3941
#include "QueryEngine/WorkUnitBuilder.h"
4042
#include "QueryOptimizer/CanonicalizeQuery.h"
@@ -1505,14 +1507,27 @@ std::optional<size_t> RelAlgExecutor::getFilteredCountAll(const WorkUnit& work_u
15051507
const bool is_agg,
15061508
const CompilationOptions& co,
15071509
const ExecutionOptions& eo) {
1508-
const auto count = hdk::ir::makeExpr<hdk::ir::AggExpr>(
1509-
hdk::ir::Context::defaultCtx().integer(config_.exec.group_by.bigint_count ? 8 : 4),
1510-
hdk::ir::AggType::kCount,
1511-
nullptr,
1512-
false,
1513-
nullptr);
1510+
auto unnested_vars = UnnestedVarsCollector::collect(work_unit.exe_unit.target_exprs);
1511+
hdk::ir::QueryBuilder builder(
1512+
hdk::ir::Context::defaultCtx(), schema_provider_, executor_->getConfigPtr());
1513+
hdk::ir::BuilderExpr count_all_agg;
1514+
if (!unnested_vars.empty()) {
1515+
hdk::ir::BuilderExpr total_count;
1516+
for (auto var : unnested_vars) {
1517+
hdk::ir::BuilderExpr var_expr(&builder, var->shared());
1518+
if (!total_count.expr()) {
1519+
total_count = var_expr.cardinality();
1520+
} else {
1521+
total_count = total_count.mul(var_expr.cardinality());
1522+
}
1523+
}
1524+
count_all_agg = total_count.sum();
1525+
} else {
1526+
count_all_agg = builder.count();
1527+
}
1528+
15141529
const auto count_all_exe_unit =
1515-
create_count_all_execution_unit(work_unit.exe_unit, count);
1530+
create_count_all_execution_unit(work_unit.exe_unit, count_all_agg.expr());
15161531
size_t one{1};
15171532
hdk::ResultSetTable count_all_result;
15181533
try {

0 commit comments

Comments
 (0)