Skip to content

Commit 637fdab

Browse files
authored
Adds the COLL_* functions (COLL_AVG, COLL_COUNT, COLL_MAX, COLL_MIN, COLL_SUM) (#353)
1 parent 425d3cc commit 637fdab

File tree

7 files changed

+492
-5
lines changed

7 files changed

+492
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2020
- Add `partiql-extension-ion` extension for encoding/decoding `Value` to/from Ion data
2121
- Add `partiql-extension-ion-functions` extension which contains an extension function for reading from an Ion file
2222
- Add `partiql-catalog` including an experimental `Catalog` interface and implementation
23+
- Implements the `COLL_*` functions -- `COLL_AVG`, `COLL_COUNT`, `COLL_MAX`, `COLL_MIN`, `COLL_SUM`
2324
### Fixes
2425
- Fix parsing of `EXTRACT` datetime parts `YEAR`, `TIMEZONE_HOUR`, and `TIMEZONE_MINUTE`
2526
- Fix logical plan to eval plan conversion for `EvalOrderBySortSpec` with arguments `DESC` and `NULLS LAST`

partiql-eval/src/eval/evaluable.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,3 +1282,10 @@ impl EvalExpr for EvalSubQueryExpr {
12821282
Cow::Owned(value)
12831283
}
12841284
}
1285+
1286+
/// Indicates if a set should be reduced to its distinct elements or not.
1287+
#[derive(Debug)]
1288+
pub(crate) enum SetQuantifier {
1289+
All,
1290+
Distinct,
1291+
}

partiql-eval/src/eval/expr/mod.rs

Lines changed: 250 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use crate::env::Bindings;
2+
use crate::eval::evaluable::SetQuantifier;
23
use crate::eval::expr::pattern_match::like_to_re_pattern;
34
use crate::eval::EvalContext;
45
use itertools::Itertools;
@@ -1149,6 +1150,255 @@ impl EvalExpr for EvalFnExtractTimezoneMinute {
11491150
}
11501151
}
11511152

1153+
/// Represents the `COLL_AVG` function, e.g. `COLL_AVG(DISTINCT [1, 2, 2, 3])`.
1154+
#[derive(Debug)]
1155+
pub(crate) struct EvalFnCollAvg {
1156+
pub(crate) setq: SetQuantifier,
1157+
pub(crate) elems: Box<dyn EvalExpr>,
1158+
}
1159+
1160+
#[inline]
1161+
#[track_caller]
1162+
fn coll_avg(elems: Vec<&Value>) -> Value {
1163+
if elems.is_empty() {
1164+
Null
1165+
} else {
1166+
let count = elems.len();
1167+
let mut sum = Value::from(0);
1168+
for e in elems {
1169+
if e.is_number() {
1170+
sum = &sum + e
1171+
} else {
1172+
return Missing;
1173+
}
1174+
}
1175+
&sum / &Value::Decimal(rust_decimal::Decimal::from(count))
1176+
}
1177+
}
1178+
1179+
impl EvalExpr for EvalFnCollAvg {
1180+
fn evaluate<'a>(&'a self, bindings: &'a Tuple, ctx: &'a dyn EvalContext) -> Cow<'a, Value> {
1181+
let elems = self.elems.evaluate(bindings, ctx);
1182+
let result = match elems.borrow() {
1183+
Null => Null,
1184+
Value::List(l) => {
1185+
let l_nums: Vec<&Value> = match self.setq {
1186+
SetQuantifier::All => l.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1187+
SetQuantifier::Distinct => l
1188+
.iter()
1189+
.filter(|&e| !e.is_null_or_missing())
1190+
.unique()
1191+
.collect(),
1192+
};
1193+
coll_avg(l_nums)
1194+
}
1195+
Value::Bag(b) => {
1196+
let b_nums: Vec<&Value> = match self.setq {
1197+
SetQuantifier::All => b.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1198+
SetQuantifier::Distinct => b
1199+
.iter()
1200+
.filter(|&e| !e.is_null_or_missing())
1201+
.unique()
1202+
.collect(),
1203+
};
1204+
coll_avg(b_nums)
1205+
}
1206+
_ => Missing,
1207+
};
1208+
Cow::Owned(result)
1209+
}
1210+
}
1211+
1212+
/// Represents the `COLL_COUNT` function, e.g. `COLL_COUNT(DISTINCT [1, 2, 2, 3])`.
1213+
#[derive(Debug)]
1214+
pub(crate) struct EvalFnCollCount {
1215+
pub(crate) setq: SetQuantifier,
1216+
pub(crate) elems: Box<dyn EvalExpr>,
1217+
}
1218+
1219+
impl EvalExpr for EvalFnCollCount {
1220+
fn evaluate<'a>(&'a self, bindings: &'a Tuple, ctx: &'a dyn EvalContext) -> Cow<'a, Value> {
1221+
let elems = self.elems.evaluate(bindings, ctx);
1222+
let result = match elems.borrow() {
1223+
Null => Null,
1224+
Value::List(l) => {
1225+
let l_nums: Vec<&Value> = match self.setq {
1226+
SetQuantifier::All => l.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1227+
SetQuantifier::Distinct => l
1228+
.iter()
1229+
.filter(|&e| !e.is_null_or_missing())
1230+
.unique()
1231+
.collect(),
1232+
};
1233+
Value::from(l_nums.len())
1234+
}
1235+
Value::Bag(b) => {
1236+
let b_nums: Vec<&Value> = match self.setq {
1237+
SetQuantifier::All => b.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1238+
SetQuantifier::Distinct => b
1239+
.iter()
1240+
.filter(|&e| !e.is_null_or_missing())
1241+
.unique()
1242+
.collect(),
1243+
};
1244+
Value::from(b_nums.len())
1245+
}
1246+
_ => Missing,
1247+
};
1248+
Cow::Owned(result)
1249+
}
1250+
}
1251+
1252+
/// Represents the `COLL_MAX` function, e.g. `COLL_MAX(DISTINCT [1, 2, 2, 3])`.
1253+
#[derive(Debug)]
1254+
pub(crate) struct EvalFnCollMax {
1255+
pub(crate) setq: SetQuantifier,
1256+
pub(crate) elems: Box<dyn EvalExpr>,
1257+
}
1258+
1259+
#[inline]
1260+
#[track_caller]
1261+
fn coll_max(elems: Vec<&Value>) -> Value {
1262+
elems.into_iter().max().unwrap_or(&Null).to_owned()
1263+
}
1264+
1265+
impl EvalExpr for EvalFnCollMax {
1266+
fn evaluate<'a>(&'a self, bindings: &'a Tuple, ctx: &'a dyn EvalContext) -> Cow<'a, Value> {
1267+
let elems = self.elems.evaluate(bindings, ctx);
1268+
let result = match elems.borrow() {
1269+
Null => Null,
1270+
Value::List(l) => {
1271+
let l_nums: Vec<&Value> = match self.setq {
1272+
SetQuantifier::All => l.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1273+
SetQuantifier::Distinct => l
1274+
.iter()
1275+
.filter(|&e| !e.is_null_or_missing())
1276+
.unique()
1277+
.collect(),
1278+
};
1279+
coll_max(l_nums)
1280+
}
1281+
Value::Bag(b) => {
1282+
let b_nums: Vec<&Value> = match self.setq {
1283+
SetQuantifier::All => b.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1284+
SetQuantifier::Distinct => b
1285+
.iter()
1286+
.filter(|&e| !e.is_null_or_missing())
1287+
.unique()
1288+
.collect(),
1289+
};
1290+
coll_max(b_nums)
1291+
}
1292+
_ => Missing,
1293+
};
1294+
Cow::Owned(result)
1295+
}
1296+
}
1297+
1298+
/// Represents the `COLL_MIN` function, e.g. `COLL_MIN(DISTINCT [1, 2, 2, 3])`.
1299+
#[derive(Debug)]
1300+
pub(crate) struct EvalFnCollMin {
1301+
pub(crate) setq: SetQuantifier,
1302+
pub(crate) elems: Box<dyn EvalExpr>,
1303+
}
1304+
1305+
#[inline]
1306+
#[track_caller]
1307+
fn coll_min(elems: Vec<&Value>) -> Value {
1308+
elems.into_iter().min().unwrap_or(&Null).to_owned()
1309+
}
1310+
1311+
impl EvalExpr for EvalFnCollMin {
1312+
fn evaluate<'a>(&'a self, bindings: &'a Tuple, ctx: &'a dyn EvalContext) -> Cow<'a, Value> {
1313+
let elems = self.elems.evaluate(bindings, ctx);
1314+
let result = match elems.borrow() {
1315+
Null => Null,
1316+
Value::List(l) => {
1317+
let l_nums: Vec<&Value> = match self.setq {
1318+
SetQuantifier::All => l.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1319+
SetQuantifier::Distinct => l
1320+
.iter()
1321+
.filter(|&e| !e.is_null_or_missing())
1322+
.unique()
1323+
.collect(),
1324+
};
1325+
coll_min(l_nums)
1326+
}
1327+
Value::Bag(b) => {
1328+
let b_nums: Vec<&Value> = match self.setq {
1329+
SetQuantifier::All => b.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1330+
SetQuantifier::Distinct => b
1331+
.iter()
1332+
.filter(|&e| !e.is_null_or_missing())
1333+
.unique()
1334+
.collect(),
1335+
};
1336+
coll_min(b_nums)
1337+
}
1338+
_ => Missing,
1339+
};
1340+
Cow::Owned(result)
1341+
}
1342+
}
1343+
1344+
/// Represents the `COLL_SUM` function, e.g. `COLL_SUM(DISTINCT [1, 2, 2, 3])`.
1345+
#[derive(Debug)]
1346+
pub(crate) struct EvalFnCollSum {
1347+
pub(crate) setq: SetQuantifier,
1348+
pub(crate) elems: Box<dyn EvalExpr>,
1349+
}
1350+
1351+
#[inline]
1352+
#[track_caller]
1353+
fn coll_sum(elems: Vec<&Value>) -> Value {
1354+
if elems.is_empty() {
1355+
Null
1356+
} else {
1357+
let mut sum = Value::from(0);
1358+
for e in elems {
1359+
if e.is_number() {
1360+
sum = &sum + e
1361+
} else {
1362+
return Missing;
1363+
}
1364+
}
1365+
sum
1366+
}
1367+
}
1368+
1369+
impl EvalExpr for EvalFnCollSum {
1370+
fn evaluate<'a>(&'a self, bindings: &'a Tuple, ctx: &'a dyn EvalContext) -> Cow<'a, Value> {
1371+
let elems = self.elems.evaluate(bindings, ctx);
1372+
let result = match elems.borrow() {
1373+
Null => Null,
1374+
Value::List(l) => {
1375+
let l_nums: Vec<&Value> = match self.setq {
1376+
SetQuantifier::All => l.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1377+
SetQuantifier::Distinct => l
1378+
.iter()
1379+
.filter(|&e| !e.is_null_or_missing())
1380+
.unique()
1381+
.collect(),
1382+
};
1383+
coll_sum(l_nums)
1384+
}
1385+
Value::Bag(b) => {
1386+
let b_nums: Vec<&Value> = match self.setq {
1387+
SetQuantifier::All => b.iter().filter(|&e| !e.is_null_or_missing()).collect(),
1388+
SetQuantifier::Distinct => b
1389+
.iter()
1390+
.filter(|&e| !e.is_null_or_missing())
1391+
.unique()
1392+
.collect(),
1393+
};
1394+
coll_sum(b_nums)
1395+
}
1396+
_ => Missing,
1397+
};
1398+
Cow::Owned(result)
1399+
}
1400+
}
1401+
11521402
/// Represents a Base Table Expr
11531403
#[derive(Debug)]
11541404
pub(crate) struct EvalFnBaseTableExpr {

partiql-eval/src/plan.rs

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ use partiql_logical as logical;
66

77
use partiql_logical::{
88
AggFunc, BinaryOp, BindingsOp, CallName, GroupingStrategy, IsTypeExpr, JoinKind, LogicalPlan,
9-
OpId, PathComponent, Pattern, PatternMatchExpr, SearchedCase, SortSpecNullOrder, SortSpecOrder,
10-
Type, UnaryOp, ValueExpr,
9+
OpId, PathComponent, Pattern, PatternMatchExpr, SearchedCase, SetQuantifier, SortSpecNullOrder,
10+
SortSpecOrder, Type, UnaryOp, ValueExpr,
1111
};
1212

1313
use crate::eval;
@@ -19,7 +19,8 @@ use crate::eval::expr::pattern_match::like_to_re_pattern;
1919
use crate::eval::expr::{
2020
EvalBagExpr, EvalBetweenExpr, EvalBinOp, EvalBinOpExpr, EvalDynamicLookup, EvalExpr, EvalFnAbs,
2121
EvalFnBaseTableExpr, EvalFnBitLength, EvalFnBtrim, EvalFnCardinality, EvalFnCharLength,
22-
EvalFnExists, EvalFnExtractDay, EvalFnExtractHour, EvalFnExtractMinute, EvalFnExtractMonth,
22+
EvalFnCollAvg, EvalFnCollCount, EvalFnCollMax, EvalFnCollMin, EvalFnCollSum, EvalFnExists,
23+
EvalFnExtractDay, EvalFnExtractHour, EvalFnExtractMinute, EvalFnExtractMonth,
2324
EvalFnExtractSecond, EvalFnExtractTimezoneHour, EvalFnExtractTimezoneMinute, EvalFnExtractYear,
2425
EvalFnLower, EvalFnLtrim, EvalFnModulus, EvalFnOctetLength, EvalFnOverlay, EvalFnPosition,
2526
EvalFnRtrim, EvalFnSubstring, EvalFnUpper, EvalIsTypeExpr, EvalLikeMatch,
@@ -34,6 +35,13 @@ pub struct EvaluatorPlanner<'c> {
3435
catalog: &'c dyn Catalog,
3536
}
3637

38+
fn plan_set_quantifier(setq: &logical::SetQuantifier) -> eval::evaluable::SetQuantifier {
39+
match setq {
40+
SetQuantifier::All => eval::evaluable::SetQuantifier::All,
41+
SetQuantifier::Distinct => eval::evaluable::SetQuantifier::Distinct,
42+
}
43+
}
44+
3745
impl<'c> EvaluatorPlanner<'c> {
3846
pub fn new(catalog: &'c dyn Catalog) -> Self {
3947
EvaluatorPlanner { catalog }
@@ -639,6 +647,41 @@ impl<'c> EvaluatorPlanner<'c> {
639647
value: args.pop().unwrap(),
640648
})
641649
}
650+
CallName::CollAvg(setq) => {
651+
assert_eq!(args.len(), 1);
652+
Box::new(EvalFnCollAvg {
653+
setq: plan_set_quantifier(setq),
654+
elems: args.pop().unwrap(),
655+
})
656+
}
657+
CallName::CollCount(setq) => {
658+
assert_eq!(args.len(), 1);
659+
Box::new(EvalFnCollCount {
660+
setq: plan_set_quantifier(setq),
661+
elems: args.pop().unwrap(),
662+
})
663+
}
664+
CallName::CollMax(setq) => {
665+
assert_eq!(args.len(), 1);
666+
Box::new(EvalFnCollMax {
667+
setq: plan_set_quantifier(setq),
668+
elems: args.pop().unwrap(),
669+
})
670+
}
671+
CallName::CollMin(setq) => {
672+
assert_eq!(args.len(), 1);
673+
Box::new(EvalFnCollMin {
674+
setq: plan_set_quantifier(setq),
675+
elems: args.pop().unwrap(),
676+
})
677+
}
678+
CallName::CollSum(setq) => {
679+
assert_eq!(args.len(), 1);
680+
Box::new(EvalFnCollSum {
681+
setq: plan_set_quantifier(setq),
682+
elems: args.pop().unwrap(),
683+
})
684+
}
642685
CallName::ByName(name) => {
643686
let function = self
644687
.catalog

0 commit comments

Comments
 (0)