Skip to content
This repository was archived by the owner on May 9, 2024. It is now read-only.

Commit 93c9f44

Browse files
committed
Add quantile aggregate.
Signed-off-by: ienkovich <[email protected]>
1 parent 281beeb commit 93c9f44

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1466
-96
lines changed

omniscidb/IR/Expr.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,8 @@ ExprPtr AggExpr::withType(const Type* new_type) const {
533533
if (type_->equal(new_type)) {
534534
return shared_from_this();
535535
}
536-
return makeExpr<AggExpr>(new_type, agg_type_, arg_, is_distinct_, arg1_);
536+
return makeExpr<AggExpr>(
537+
new_type, agg_type_, arg_, is_distinct_, arg1_, interpolation_);
537538
}
538539

539540
ExprPtr CaseExpr::withType(const Type* new_type) const {
@@ -1193,7 +1194,8 @@ bool AggExpr::operator==(const Expr& rhs) const {
11931194
return false;
11941195
}
11951196
const AggExpr& rhs_ae = dynamic_cast<const AggExpr&>(rhs);
1196-
if (agg_type_ != rhs_ae.aggType() || is_distinct_ != rhs_ae.isDistinct()) {
1197+
if (agg_type_ != rhs_ae.aggType() || is_distinct_ != rhs_ae.isDistinct() ||
1198+
interpolation_ != rhs_ae.interpolation_) {
11971199
return false;
11981200
}
11991201
if (arg_.get() == rhs_ae.arg()) {
@@ -1598,6 +1600,9 @@ std::string AggExpr::toString() const {
15981600
if (arg1_) {
15991601
ss << arg1_->toString();
16001602
}
1603+
if (agg_type_ == AggType::kQuantile) {
1604+
ss << " " << interpolation_;
1605+
}
16011606
ss << ")";
16021607
return ss.str();
16031608
}
@@ -2059,6 +2064,9 @@ size_t AggExpr::hash() const {
20592064
if (arg1_) {
20602065
boost::hash_combine(*hash_, arg1_->hash());
20612066
}
2067+
if (agg_type_ == AggType::kQuantile) {
2068+
boost::hash_combine(*hash_, static_cast<int>(interpolation_));
2069+
}
20622070
}
20632071
return *hash_;
20642072
}

omniscidb/IR/Expr.h

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -787,11 +787,22 @@ class LikelihoodExpr : public Expr {
787787
*/
788788
class AggExpr : public Expr {
789789
public:
790-
AggExpr(const Type* type, AggType a, ExprPtr arg, bool d, ExprPtr arg1)
791-
: Expr(type, true), agg_type_(a), arg_(arg), is_distinct_(d), arg1_(arg1) {
790+
AggExpr(const Type* type,
791+
AggType a,
792+
ExprPtr arg,
793+
bool d,
794+
ExprPtr arg1,
795+
Interpolation interpolation = Interpolation::kLinear)
796+
: Expr(type, true)
797+
, agg_type_(a)
798+
, arg_(arg)
799+
, is_distinct_(d)
800+
, arg1_(arg1)
801+
, interpolation_(interpolation) {
792802
if (arg1) {
793803
if (agg_type_ == AggType::kApproxCountDistinct ||
794-
agg_type_ == AggType::kApproxQuantile || agg_type_ == AggType::kTopK) {
804+
agg_type_ == AggType::kApproxQuantile || agg_type_ == AggType::kTopK ||
805+
agg_type_ == AggType::kQuantile) {
795806
CHECK(arg1_->is<Constant>());
796807
} else {
797808
CHECK(agg_type_ == AggType::kCorr);
@@ -804,6 +815,7 @@ class AggExpr : public Expr {
804815
bool isDistinct() const { return is_distinct_; }
805816
const Expr* arg1() const { return arg1_.get(); }
806817
ExprPtr arg1Shared() const { return arg1_; }
818+
Interpolation interpolation() const { return interpolation_; }
807819
ExprPtr withType(const Type* new_type) const override;
808820
bool operator==(const Expr& rhs) const override;
809821
std::string toString() const override;
@@ -817,6 +829,8 @@ class AggExpr : public Expr {
817829
// APPROX_COUNT_DISTINCT error_rate, APPROX_QUANTILE quantile,
818830
// CORR second arg
819831
ExprPtr arg1_;
832+
// QUANTILE interpolation
833+
Interpolation interpolation_;
820834
};
821835

822836
/*

omniscidb/IR/ExprRewriter.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -320,8 +320,12 @@ class ExprRewriter : public ExprVisitor<ExprPtr> {
320320
ExprPtr new_arg = agg->arg() ? visit(agg->arg()) : nullptr;
321321
ExprPtr new_arg1 = agg->arg1() ? visit(agg->arg1()) : nullptr;
322322
if (new_arg.get() != agg->arg() || new_arg1.get() != agg->arg1()) {
323-
return hdk::ir::makeExpr<hdk::ir::AggExpr>(
324-
agg->type(), agg->aggType(), new_arg, agg->isDistinct(), new_arg1);
323+
return hdk::ir::makeExpr<hdk::ir::AggExpr>(agg->type(),
324+
agg->aggType(),
325+
new_arg,
326+
agg->isDistinct(),
327+
new_arg1,
328+
agg->interpolation());
325329
}
326330
return defaultResult(agg);
327331
}

omniscidb/IR/OpType.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ inline std::string toString(hdk::ir::AggType agg) {
105105
return "SINGLE_VALUE";
106106
case hdk::ir::AggType::kTopK:
107107
return "TOP_K";
108+
case hdk::ir::AggType::kQuantile:
109+
return "QUANTILE";
108110
case hdk::ir::AggType::kStdDevSamp:
109111
return "STDDEV";
110112
case hdk::ir::AggType::kCorr:
@@ -153,6 +155,23 @@ inline std::string toString(hdk::ir::WindowFunctionKind kind) {
153155
return "";
154156
}
155157

158+
inline std::string toString(hdk::ir::Interpolation interpolation) {
159+
switch (interpolation) {
160+
case hdk::ir::Interpolation::kLower:
161+
return "LOWER";
162+
case hdk::ir::Interpolation::kHigher:
163+
return "HIGHER";
164+
case hdk::ir::Interpolation::kNearest:
165+
return "NEAREST";
166+
case hdk::ir::Interpolation::kMidpoint:
167+
return "MIDPOINT";
168+
case hdk::ir::Interpolation::kLinear:
169+
return "LINEAR";
170+
}
171+
LOG(FATAL) << "Invalid interpolation kind " << (int)interpolation;
172+
return "";
173+
}
174+
156175
namespace hdk::ir {
157176

158177
inline std::ostream& operator<<(std::ostream& os, hdk::ir::OpType op) {
@@ -171,4 +190,8 @@ inline std::ostream& operator<<(std::ostream& os, hdk::ir::WindowFunctionKind ki
171190
return os << toString(kind);
172191
}
173192

193+
inline std::ostream& operator<<(std::ostream& os, hdk::ir::Interpolation interpolation) {
194+
return os << toString(interpolation);
195+
}
196+
174197
} // namespace hdk::ir

omniscidb/IR/OpTypeEnums.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ enum class AggType {
8686
kSample,
8787
kSingleValue,
8888
kTopK,
89+
kQuantile,
8990
// Compound aggregates
9091
kStdDevSamp,
9192
kCorr,
@@ -110,4 +111,6 @@ enum class WindowFunctionKind {
110111
SumInternal // For deserialization from Calcite only. Gets rewritten to a regular SUM.
111112
};
112113

114+
enum class Interpolation { kLower, kHigher, kNearest, kMidpoint, kLinear };
115+
113116
} // namespace hdk::ir

omniscidb/QueryBuilder/QueryBuilder.cpp

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ BuilderExpr BuilderExpr::approxCountDist() const {
558558

559559
BuilderExpr BuilderExpr::approxQuantile(double val) const {
560560
if (!expr_->type()->isNumber()) {
561-
throw InvalidQueryError() << "Unsupported type for sum aggregate: "
561+
throw InvalidQueryError() << "Unsupported type for ApproxQuantile aggregate: "
562562
<< expr_->type()->toString();
563563
}
564564
if (val < 0.0 || val > 1.0) {
@@ -613,6 +613,33 @@ BuilderExpr BuilderExpr::bottomK(int count) const {
613613
return topK(-count);
614614
}
615615

616+
BuilderExpr BuilderExpr::quantile(double val, Interpolation interpolation) const {
617+
if (!expr_->type()->isNumber() && !expr_->type()->isDateTime() &&
618+
!expr_->type()->isInterval()) {
619+
throw InvalidQueryError() << "Unsupported type for quantile aggregate: "
620+
<< expr_->type()->toString();
621+
}
622+
if (val < 0.0 || val > 1.0) {
623+
throw InvalidQueryError() << "Quantile expects argument between 0.0 and 1.0 but got "
624+
<< val;
625+
}
626+
Datum d;
627+
d.doubleval = val;
628+
auto cst = makeExpr<Constant>(builder_->ctx_.fp64(), false, d);
629+
auto res_type = expr_->type();
630+
if (interpolation == Interpolation::kMidpoint ||
631+
interpolation == Interpolation::kLinear) {
632+
if (res_type->isInteger()) {
633+
res_type = builder_->ctx_.fp64();
634+
}
635+
}
636+
res_type = res_type->canonicalize();
637+
auto agg =
638+
makeExpr<AggExpr>(res_type, AggType::kQuantile, expr_, false, cst, interpolation);
639+
auto name = name_.empty() ? "quantile" : name_ + "_quantile";
640+
return {builder_, agg, name, true};
641+
}
642+
616643
BuilderExpr BuilderExpr::stdDev() const {
617644
if (!expr_->type()->isNumber()) {
618645
throw InvalidQueryError() << "Non-numeric type " << expr_->type()->toString()
@@ -704,6 +731,7 @@ BuilderExpr BuilderExpr::agg(const std::string& agg_str, BuilderExpr arg) const
704731
{"top_k", AggType::kTopK},
705732
{"bottomk", AggType::kTopK},
706733
{"bottom_k", AggType::kTopK},
734+
{"quantile", AggType::kQuantile},
707735
{"stddev", AggType::kStdDevSamp},
708736
{"stddev_samp", AggType::kStdDevSamp},
709737
{"stddev samp", AggType::kStdDevSamp},
@@ -716,8 +744,8 @@ BuilderExpr BuilderExpr::agg(const std::string& agg_str, BuilderExpr arg) const
716744
}
717745

718746
auto kind = agg_names.at(agg_str_lower);
719-
if (kind == AggType::kApproxQuantile && !arg.expr()) {
720-
throw InvalidQueryError("Missing argument for approximate quantile aggregate.");
747+
if ((kind == AggType::kApproxQuantile || kind == AggType::kQuantile) && !arg.expr()) {
748+
throw InvalidQueryError("Missing argument for quantile aggregate.");
721749
}
722750
if (kind == AggType::kTopK) {
723751
if (!arg.expr()) {
@@ -751,8 +779,10 @@ BuilderExpr BuilderExpr::agg(AggType agg_kind, const BuilderExpr& arg) const {
751779
return agg(agg_kind, false, arg);
752780
}
753781

754-
BuilderExpr BuilderExpr::agg(AggType agg_kind, double val) const {
755-
return agg(agg_kind, false, val);
782+
BuilderExpr BuilderExpr::agg(AggType agg_kind,
783+
double val,
784+
Interpolation interpolation) const {
785+
return agg(agg_kind, false, val, interpolation);
756786
}
757787

758788
BuilderExpr BuilderExpr::agg(AggType agg_kind, int val) const {
@@ -761,21 +791,21 @@ BuilderExpr BuilderExpr::agg(AggType agg_kind, int val) const {
761791

762792
BuilderExpr BuilderExpr::agg(AggType agg_kind,
763793
bool is_distinct,
764-
const BuilderExpr& arg) const {
794+
const BuilderExpr& arg,
795+
Interpolation interpolation) const {
765796
if (is_distinct && agg_kind != AggType::kCount) {
766797
throw InvalidQueryError() << "Distinct property cannot be set to true for "
767798
<< agg_kind << " aggregate.";
768799
}
769800
if (arg.expr() && agg_kind != AggType::kApproxQuantile && agg_kind != AggType::kCorr &&
770-
agg_kind != AggType::kTopK) {
801+
agg_kind != AggType::kTopK && agg_kind != AggType::kQuantile) {
771802
throw InvalidQueryError() << "Aggregate argument is supported for approximate "
772803
"quantile and corr only but provided for "
773804
<< agg_kind;
774805
}
775-
if (agg_kind == AggType::kApproxQuantile) {
806+
if (agg_kind == AggType::kApproxQuantile || agg_kind == AggType::kQuantile) {
776807
if (!arg.expr()->is<Constant>() || !arg.type()->isFloatingPoint()) {
777-
throw InvalidQueryError() << "Expected fp constant argumnt for approximate "
778-
"quantile. Provided: "
808+
throw InvalidQueryError() << "Expected fp constant argumnt for quantile. Provided: "
779809
<< arg.expr()->toString();
780810
}
781811
}
@@ -808,6 +838,8 @@ BuilderExpr BuilderExpr::agg(AggType agg_kind,
808838
return singleValue();
809839
case AggType::kTopK:
810840
return topK(arg.expr()->as<Constant>()->intVal());
841+
case AggType::kQuantile:
842+
return quantile(arg.expr()->as<Constant>()->fpVal(), interpolation);
811843
case AggType::kStdDevSamp:
812844
return stdDev();
813845
case AggType::kCorr:
@@ -818,12 +850,15 @@ BuilderExpr BuilderExpr::agg(AggType agg_kind,
818850
throw InvalidQueryError() << "Unsupported aggregate type: " << agg_kind;
819851
}
820852

821-
BuilderExpr BuilderExpr::agg(AggType agg_kind, bool is_distinct, double val) const {
853+
BuilderExpr BuilderExpr::agg(AggType agg_kind,
854+
bool is_distinct,
855+
double val,
856+
Interpolation interpolation) const {
822857
BuilderExpr arg;
823858
if (val != HUGE_VAL) {
824859
arg = builder_->cst(val);
825860
}
826-
return agg(agg_kind, is_distinct, arg);
861+
return agg(agg_kind, is_distinct, arg, interpolation);
827862
}
828863

829864
BuilderExpr BuilderExpr::extract(DateExtractField field) const {

omniscidb/QueryBuilder/QueryBuilder.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,8 @@ class BuilderExpr {
7878
BuilderExpr singleValue() const;
7979
BuilderExpr topK(int count) const;
8080
BuilderExpr bottomK(int count) const;
81+
BuilderExpr quantile(double val,
82+
Interpolation interpolation = Interpolation::kLinear) const;
8183
BuilderExpr stdDev() const;
8284
BuilderExpr corr(const BuilderExpr& arg) const;
8385

@@ -90,12 +92,18 @@ class BuilderExpr {
9092
BuilderExpr agg(const std::string& agg_str, double val = HUGE_VAL) const;
9193
BuilderExpr agg(const std::string& agg_str, int val) const;
9294
BuilderExpr agg(AggType agg_kind, const BuilderExpr& arg) const;
93-
BuilderExpr agg(AggType agg_kind, double val) const;
95+
BuilderExpr agg(AggType agg_kind,
96+
double val,
97+
Interpolation interpolation = Interpolation::kLinear) const;
9498
BuilderExpr agg(AggType agg_kind, int val) const;
95-
BuilderExpr agg(AggType agg_kind, bool is_dinstinct, const BuilderExpr& arg) const;
99+
BuilderExpr agg(AggType agg_kind,
100+
bool is_dinstinct,
101+
const BuilderExpr& arg,
102+
Interpolation interpolation = Interpolation::kLinear) const;
96103
BuilderExpr agg(AggType agg_kind,
97104
bool is_dinstinct = false,
98-
double val = HUGE_VAL) const;
105+
double val = HUGE_VAL,
106+
Interpolation interpolation = Interpolation::kLinear) const;
99107

100108
BuilderExpr extract(DateExtractField field) const;
101109
BuilderExpr extract(const std::string& field) const;

omniscidb/QueryEngine/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ set(hdk_default_runtime_functions_module_dependencies
182182
DecodersImpl.h
183183
${CMAKE_CURRENT_SOURCE_DIR}/../Utils/ExtractFromTime.cpp
184184
${CMAKE_CURRENT_SOURCE_DIR}/../Utils/StringLike.cpp
185+
${CMAKE_CURRENT_SOURCE_DIR}/../Shared/quantile.h
185186
GroupByRuntime.cpp
186187
TopKRuntime.cpp)
187188

omniscidb/QueryEngine/CalciteDeserializerUtils.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626

2727
const hdk::ir::Type* get_agg_type(hdk::ir::AggType agg_kind,
2828
const hdk::ir::Expr* arg_expr,
29-
bool bigint_count) {
29+
bool bigint_count,
30+
hdk::ir::Interpolation interpolation) {
3031
auto& ctx = arg_expr ? arg_expr->type()->ctx() : hdk::ir::Context::defaultCtx();
3132
switch (agg_kind) {
3233
case hdk::ir::AggType::kCount:
@@ -51,6 +52,14 @@ const hdk::ir::Type* get_agg_type(hdk::ir::AggType agg_kind,
5152
return arg_expr->type();
5253
case hdk::ir::AggType::kTopK:
5354
return ctx.arrayVarLen(arg_expr->type(), 4, false);
55+
case hdk::ir::AggType::kQuantile:
56+
if (interpolation == hdk::ir::Interpolation::kMidpoint ||
57+
interpolation == hdk::ir::Interpolation::kLinear) {
58+
if (arg_expr->type()->isInteger()) {
59+
return ctx.fp64();
60+
}
61+
}
62+
return arg_expr->type()->canonicalize();
5463
default:
5564
CHECK(false);
5665
}

omniscidb/QueryEngine/CalciteDeserializerUtils.h

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,31 @@ inline hdk::ir::AggType to_agg_kind(const std::string& agg_name) {
134134
if (agg_name == std::string("TOP_K")) {
135135
return hdk::ir::AggType::kTopK;
136136
}
137+
if (agg_name == std::string("QUANTILE")) {
138+
return hdk::ir::AggType::kQuantile;
139+
}
137140
throw std::runtime_error("Aggregate function " + agg_name + " not supported");
138141
}
139142

143+
inline hdk::ir::Interpolation to_interpolation(const std::string& interpolation) {
144+
if (interpolation == std::string("LOWER")) {
145+
return hdk::ir::Interpolation::kLower;
146+
}
147+
if (interpolation == std::string("HIGHER")) {
148+
return hdk::ir::Interpolation::kHigher;
149+
}
150+
if (interpolation == std::string("NEAREST")) {
151+
return hdk::ir::Interpolation::kNearest;
152+
}
153+
if (interpolation == std::string("MIDPOINT")) {
154+
return hdk::ir::Interpolation::kMidpoint;
155+
}
156+
if (interpolation == std::string("LINEAR")) {
157+
return hdk::ir::Interpolation::kLinear;
158+
}
159+
throw std::runtime_error("Interpolation " + interpolation + " is not supported");
160+
}
161+
140162
namespace hdk::ir {
141163

142164
class Constant;
@@ -145,9 +167,11 @@ class Type;
145167

146168
} // namespace hdk::ir
147169

148-
const hdk::ir::Type* get_agg_type(hdk::ir::AggType agg_kind,
149-
const hdk::ir::Expr* arg_expr,
150-
bool bigint_count);
170+
const hdk::ir::Type* get_agg_type(
171+
hdk::ir::AggType agg_kind,
172+
const hdk::ir::Expr* arg_expr,
173+
bool bigint_count,
174+
hdk::ir::Interpolation interpolation = hdk::ir::Interpolation::kLower);
151175

152176
hdk::ir::DateExtractField to_datepart_field(const std::string&);
153177

0 commit comments

Comments
 (0)