Skip to content

Commit 8197b2d

Browse files
author
longshan.lu
committed
WIP
1 parent 2d4b952 commit 8197b2d

File tree

9 files changed

+503
-51
lines changed

9 files changed

+503
-51
lines changed

qurious/src/datatypes/scalar.rs

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ use crate::error::{Error, Result};
22
use arrow::{
33
array::{
44
new_null_array, Array, ArrayRef, ArrowPrimitiveType, BooleanArray, Decimal128Array, Decimal256Array,
5-
Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, PrimitiveArray,
6-
StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
5+
Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, IntervalMonthDayNanoArray,
6+
LargeStringArray, PrimitiveArray, StringArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array,
77
},
8-
datatypes::{i256, DataType, Field},
8+
datatypes::{i256, DataType, Field, IntervalMonthDayNano, IntervalUnit},
99
};
1010
use std::any::type_name;
1111
use std::{fmt::Display, sync::Arc};
@@ -99,6 +99,10 @@ pub enum ScalarValue {
9999
Decimal128(Option<i128>, u8, i8),
100100
/// 256bit decimal, using the i256 to represent the decimal, precision scale
101101
Decimal256(Option<i256>, u8, i8),
102+
/// A triple of the number of elapsed months, days, and nanoseconds.
103+
/// Months and days are encoded as 32-bit signed integers.
104+
/// Nanoseconds is encoded as a 64-bit signed integer (no leap seconds).
105+
IntervalMonthDayNano(Option<IntervalMonthDayNano>),
102106
Utf8(Option<String>),
103107
}
104108

@@ -130,6 +134,11 @@ impl ScalarValue {
130134
ScalarValue::Utf8(_) => Field::new("utf8", DataType::Utf8, true),
131135
ScalarValue::Decimal128(_, p, s) => Field::new("decimal128", DataType::Decimal128(*p, *s), true),
132136
ScalarValue::Decimal256(_, p, s) => Field::new("decimal256", DataType::Decimal256(*p, *s), true),
137+
ScalarValue::IntervalMonthDayNano(_) => Field::new(
138+
"interval_month_day_nano",
139+
DataType::Interval(IntervalUnit::MonthDayNano),
140+
true,
141+
),
133142
}
134143
}
135144

@@ -150,6 +159,7 @@ impl ScalarValue {
150159
ScalarValue::Utf8(_) => DataType::Utf8,
151160
ScalarValue::Decimal128(_, p, s) => DataType::Decimal128(*p, *s),
152161
ScalarValue::Decimal256(_, p, s) => DataType::Decimal256(*p, *s),
162+
ScalarValue::IntervalMonthDayNano(_) => DataType::Interval(IntervalUnit::MonthDayNano),
153163
}
154164
}
155165

@@ -174,6 +184,9 @@ impl ScalarValue {
174184
ScalarValue::Decimal256(v, p, s) => {
175185
Arc::new(build_decimal_array!(*v, Decimal256Array, num_row, *p, *s)) as ArrayRef
176186
}
187+
ScalarValue::IntervalMonthDayNano(v) => {
188+
Arc::new(IntervalMonthDayNanoArray::from(vec![*v; num_row])) as ArrayRef
189+
}
177190
})
178191
}
179192

@@ -275,6 +288,9 @@ impl Display for ScalarValue {
275288
ScalarValue::Decimal128(v, p, s) => format_decimal!(f, v, "Decimal128", p, s),
276289
ScalarValue::Decimal256(v, p, s) => format_decimal!(f, v, "Decimal256", p, s),
277290
ScalarValue::Utf8(v) => format_string!(f, v, "Utf8"),
291+
ScalarValue::IntervalMonthDayNano(v) => {
292+
format_string!(f, v.map(|v| format!("{:?}", v)), "IntervalMonthDayNano")
293+
}
278294
}
279295
}
280296
}

qurious/src/execution/session.rs

Lines changed: 18 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ impl ExecuteSession {
9898
LogicalPlan::Dml(stmt) => self.execute_dml(stmt),
9999
plan => {
100100
let plan = self.optimizer.optimize(plan)?;
101+
println!("plan: {}", crate::utils::format(&plan, 0));
101102
self.planner.create_physical_plan(&plan)?.execute()
102103
}
103104
}
@@ -385,49 +386,26 @@ mod tests {
385386
.sql(
386387
"
387388
select
388-
s_acctbal,
389-
s_name,
390-
n_name,
391-
p_partkey,
392-
p_mfgr,
393-
s_address,
394-
s_phone,
395-
s_comment
389+
o_orderpriority,
390+
count(*) as order_count
396391
from
397-
part,
398-
supplier,
399-
partsupp,
400-
nation,
401-
region
392+
orders
402393
where
403-
p_partkey = ps_partkey
404-
and s_suppkey = ps_suppkey
405-
and p_size = 15
406-
and p_type like '%BRASS'
407-
and s_nationkey = n_nationkey
408-
and n_regionkey = r_regionkey
409-
and r_name = 'EUROPE'
410-
and ps_supplycost = (
411-
select
412-
min(ps_supplycost)
413-
from
414-
partsupp,
415-
supplier,
416-
nation,
417-
region
418-
where
419-
p_partkey = ps_partkey
420-
and s_suppkey = ps_suppkey
421-
and s_nationkey = n_nationkey
422-
and n_regionkey = r_regionkey
423-
and r_name = 'EUROPE'
424-
)
394+
o_orderdate >= '1993-07-01'
395+
and o_orderdate < date '1993-07-01' + interval '3' month
396+
and exists (
397+
select
398+
*
399+
from
400+
lineitem
401+
where
402+
l_orderkey = o_orderkey
403+
and l_commitdate < l_receiptdate
404+
)
405+
group by
406+
o_orderpriority
425407
order by
426-
s_acctbal desc,
427-
n_name,
428-
s_name,
429-
p_partkey
430-
limit 10;
408+
o_orderpriority;
431409
",
432410
)
433411
.unwrap();

qurious/src/logical/expr/mod.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ pub enum LogicalExpr {
4646
Like(Like),
4747
Negative(Box<LogicalExpr>),
4848
SubQuery(SubQuery),
49+
Exists(Exists),
4950
}
5051

5152
macro_rules! impl_logical_expr_methods {
@@ -83,6 +84,7 @@ impl_logical_expr_methods! {
8384
impl Display for LogicalExpr {
8485
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
8586
match self {
87+
LogicalExpr::Exists(Exists { subquery, negated }) => write!(f, "{} EXISTS ({})", if *negated { "NOT" } else { "" }, subquery),
8688
LogicalExpr::Negative(e) => write!(f, "- {}", e),
8789
LogicalExpr::Literal(v) => write!(f, "{}", v),
8890
LogicalExpr::Wildcard => write!(f, "*"),
@@ -213,6 +215,7 @@ impl LogicalExpr {
213215
LogicalExpr::SortExpr(SortExpr { expr, .. }) | LogicalExpr::Negative(expr) => expr.data_type(schema),
214216
LogicalExpr::Like(_) | LogicalExpr::IsNull(_) | LogicalExpr::IsNotNull(_) => Ok(DataType::Boolean),
215217
LogicalExpr::SubQuery(subquery) => Ok(subquery.subquery.schema().fields[0].data_type().clone()),
218+
LogicalExpr::Exists(_) => Ok(DataType::Boolean),
216219
_ => internal_err!("[{}] has no data type", self),
217220
}
218221
}
@@ -233,6 +236,12 @@ impl LogicalExpr {
233236
impl TransformNode for LogicalExpr {
234237
fn map_children<F: FnMut(Self) -> Result<Transformed<Self>>>(self, mut f: F) -> Result<Transformed<Self>> {
235238
Ok(match self {
239+
LogicalExpr::Exists(Exists { subquery, negated }) => subquery.map_exprs(f)?.update(|subquery| {
240+
LogicalExpr::Exists(Exists {
241+
subquery: Box::new(subquery),
242+
negated,
243+
})
244+
}),
236245
LogicalExpr::Alias(Alias { expr, name }) => f(*expr)?.update(|expr| {
237246
LogicalExpr::Alias(Alias {
238247
expr: Box::new(expr),
@@ -312,7 +321,11 @@ impl TransformNode for LogicalExpr {
312321
| LogicalExpr::IsNull(expr)
313322
| LogicalExpr::IsNotNull(expr)
314323
| LogicalExpr::Alias(Alias { expr, .. }) => vec![expr.as_ref()],
315-
LogicalExpr::SubQuery(_) | LogicalExpr::Wildcard | LogicalExpr::Column(_) | LogicalExpr::Literal(_) => {
324+
LogicalExpr::Exists(_)
325+
| LogicalExpr::SubQuery(_)
326+
| LogicalExpr::Wildcard
327+
| LogicalExpr::Column(_)
328+
| LogicalExpr::Literal(_) => {
316329
vec![]
317330
}
318331
LogicalExpr::Like(like) => vec![like.expr.as_ref(), like.pattern.as_ref()],
@@ -336,6 +349,12 @@ pub(crate) fn get_expr_value(expr: LogicalExpr) -> Result<i64> {
336349
}
337350
}
338351

352+
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
353+
pub struct Exists {
354+
pub negated: bool,
355+
pub subquery: Box<LogicalPlan>,
356+
}
357+
339358
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
340359
pub struct Like {
341360
pub negated: bool,

qurious/src/planner/sql.rs

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
use std::{collections::HashMap, sync::Arc};
22

3-
use arrow::datatypes::{Field, Schema, TimeUnit};
4-
use sqlparser::ast::{
5-
Assignment, BinaryOperator, CopyOption, CopySource, CopyTarget, Cte, Expression, From, FunctionArgument, Ident,
6-
Literal, Order, Select, SelectItem, Statement,
3+
use arrow::{
4+
compute::kernels::cast_utils::{parse_interval_month_day_nano_config, IntervalParseConfig, IntervalUnit},
5+
datatypes::{Field, Schema, TimeUnit},
6+
};
7+
use sqlparser::{
8+
ast::{
9+
Assignment, BinaryOperator, CopyOption, CopySource, CopyTarget, Cte, Expression, From, FunctionArgument, Ident,
10+
Literal, Order, Select, SelectItem, Statement,
11+
},
12+
datatype::IntervalFields,
713
};
814

915
use crate::{
@@ -1015,10 +1021,28 @@ impl<'a> SqlQueryPlanner<'a> {
10151021
expr: Box::new(self.sql_to_expr(*left)?),
10161022
pattern: Box::new(self.sql_to_expr(*right)?),
10171023
})),
1024+
Expression::Exists { subquery, negated } => Ok(LogicalExpr::Exists(Exists {
1025+
negated,
1026+
subquery: Box::new(self.new_context_scope(|planner| planner.select_to_plan(*subquery))?),
1027+
})),
1028+
Expression::Interval { expr, field } => self.interval_to_expr(*expr, field),
10181029
_ => todo!("sql_to_expr: {:?}", expr),
10191030
}
10201031
}
10211032

1033+
fn interval_to_expr(&mut self, expr: Expression, field: IntervalFields) -> Result<LogicalExpr> {
1034+
match expr {
1035+
Expression::BinaryOperator(binary_op) => self.parse_binary_op(binary_op),
1036+
val => {
1037+
let interval_val = format!("{} {}", val, field);
1038+
let config = IntervalParseConfig::new(IntervalUnit::Second);
1039+
let val = parse_interval_month_day_nano_config(&interval_val, config)?;
1040+
1041+
Ok(LogicalExpr::Literal(ScalarValue::IntervalMonthDayNano(Some(val))))
1042+
}
1043+
}
1044+
}
1045+
10221046
fn handle_function(&self, name: &str, mut args: Vec<LogicalExpr>) -> Result<LogicalExpr> {
10231047
if let Some(udf) = self.udfs.get(name.to_uppercase().as_str()) {
10241048
return Ok(LogicalExpr::Function(Function {
@@ -1285,6 +1309,34 @@ mod tests {
12851309

12861310
use super::SqlQueryPlanner;
12871311

1312+
#[test]
1313+
fn test_interval() {
1314+
quick_test(
1315+
"SELECT INTERVAL '1' YEAR;",
1316+
"Projection: (IntervalMonthDayNano('IntervalMonthDayNano { months: 12, days: 0, nanoseconds: 0 }'))\n Empty Relation\n",
1317+
);
1318+
1319+
quick_test(
1320+
"SELECT INTERVAL '1' MONTH;",
1321+
"Projection: (IntervalMonthDayNano('IntervalMonthDayNano { months: 1, days: 0, nanoseconds: 0 }'))\n Empty Relation\n",
1322+
);
1323+
1324+
quick_test(
1325+
"SELECT INTERVAL '1' DAY;",
1326+
"Projection: (IntervalMonthDayNano('IntervalMonthDayNano { months: 0, days: 1, nanoseconds: 0 }'))\n Empty Relation\n",
1327+
);
1328+
1329+
quick_test("SELECT DATE '1993-07-01' + INTERVAL '3' month", "Projection: (CAST(Utf8('1993-07-01') AS Date32) + IntervalMonthDayNano('IntervalMonthDayNano { months: 3, days: 0, nanoseconds: 0 }'))\n Empty Relation\n");
1330+
}
1331+
1332+
#[test]
1333+
fn test_exists() {
1334+
quick_test(
1335+
"SELECT * FROM person WHERE EXISTS (SELECT * FROM other_tbl WHERE name = first_name)",
1336+
"",
1337+
);
1338+
}
1339+
12881340
#[test]
12891341
fn test_outer_field_reference() {
12901342
quick_test(

qurious/tests/tpch/q4.slt

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
query TI
2+
select
3+
o_orderpriority,
4+
count(*) as order_count
5+
from
6+
orders
7+
where
8+
o_orderdate >= '1993-07-01'
9+
and o_orderdate < date '1993-07-01' + interval '3' month
10+
and exists (
11+
select
12+
*
13+
from
14+
lineitem
15+
where
16+
l_orderkey = o_orderkey
17+
and l_commitdate < l_receiptdate
18+
)
19+
group by
20+
o_orderpriority
21+
order by
22+
o_orderpriority;
23+
----
24+
1-URGENT 999
25+
2-HIGH 997
26+
3-MEDIUM 1031
27+
4-NOT SPECIFIED 989
28+
5-LOW 1077

sqlparser/src/ast.rs

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
1-
use crate::{datatype::DataType, error::Error};
1+
use crate::{
2+
datatype::{DataType, IntervalFields},
3+
error::Error,
4+
};
25
use std::fmt::{Display, Formatter};
36

47
#[derive(Clone, PartialEq, Debug)]
@@ -670,6 +673,17 @@ pub enum Expression {
670673
left: Box<Expression>,
671674
right: Box<Expression>,
672675
},
676+
/// `INTERVAL <expr> <field>`
677+
Interval {
678+
expr: Box<Expression>,
679+
field: IntervalFields,
680+
},
681+
/// An exists expression `[ NOT ] EXISTS(SELECT ...)`, used in expressions like
682+
/// `WHERE [ NOT ] EXISTS (SELECT ...)`.
683+
Exists {
684+
subquery: Box<Select>,
685+
negated: bool,
686+
},
673687
}
674688

675689
#[derive(Clone, PartialEq, Eq, Debug)]
@@ -797,6 +811,10 @@ impl Display for Expression {
797811
Expression::Like { negated, left, right } => {
798812
write!(f, "{} {} LIKE {}", left, if *negated { "NOT" } else { "" }, right)
799813
}
814+
Expression::Interval { expr, field } => write!(f, "INTERVAL {} {}", expr, field),
815+
Expression::Exists { subquery, negated } => {
816+
write!(f, "{} EXISTS ({})", if *negated { "NOT" } else { "" }, subquery)
817+
}
800818
}
801819
}
802820
}

0 commit comments

Comments
 (0)