Skip to content

Commit 243437b

Browse files
joseph-isaacsmwlon
authored andcommitted
feat[vortex-duckdb-ext]: support enough scalar expr and array conversion for tpch & clickbench (vortex-data#3570)
--------- Signed-off-by: Joe Isaacs <joe.isaacs@live.co.uk> Signed-off-by: mwlon <m.w.loncaric@gmail.com>
1 parent 35a6fc9 commit 243437b

File tree

14 files changed

+236
-53
lines changed

14 files changed

+236
-53
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-duckdb-ext/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ duckdb = { git = "https://github.com/vortex-data/duckdb-rs", rev = "247ffb36c41b
3131
] }
3232
glob = { workspace = true }
3333
itertools = { workspace = true }
34+
log = { workspace = true }
3435
num-traits = { workspace = true }
3536
tempfile = { workspace = true }
3637
tokio = { workspace = true, features = ["macros", "rt"] }

vortex-duckdb-ext/cpp/expr.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
1-
#include "duckdb/planner/expression.hpp"
2-
31
#include "duckdb_vx.h"
42
#include "duckdb/planner/expression/bound_between_expression.hpp"
53
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
64
#include "duckdb/planner/expression/bound_comparison_expression.hpp"
75
#include "duckdb/planner/expression/bound_constant_expression.hpp"
86
#include "duckdb/planner/expression/bound_function_expression.hpp"
97
#include "duckdb/planner/expression/bound_operator_expression.hpp"
8+
#include "duckdb/planner/expression/bound_conjunction_expression.hpp"
109

1110
using namespace duckdb;
1211

@@ -66,6 +65,18 @@ extern "C" void duckdb_vx_expr_get_bound_comparison(duckdb_vx_expr ffi_expr,
6665
out->type = static_cast<duckdb_vx_expr_type>(expr.type);
6766
}
6867

68+
extern "C" void duckdb_vx_expr_get_bound_conjunction(duckdb_vx_expr ffi_expr,
69+
duckdb_vx_expr_bound_conjunction *out) {
70+
if (!ffi_expr || !out) {
71+
return;
72+
}
73+
74+
auto &expr = reinterpret_cast<Expression *>(ffi_expr)->Cast<BoundConjunctionExpression>();
75+
out->children_count = expr.children.size();
76+
out->children = reinterpret_cast<duckdb_vx_expr *>(expr.children.data());
77+
out->type = static_cast<duckdb_vx_expr_type>(expr.type);
78+
}
79+
6980
extern "C" void duckdb_vx_expr_get_bound_between(duckdb_vx_expr ffi_expr, duckdb_vx_expr_bound_between *out) {
7081
if (!ffi_expr || !out) {
7182
return;

vortex-duckdb-ext/cpp/include/duckdb_vx/expr.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,14 @@ typedef struct {
218218

219219
void duckdb_vx_expr_get_bound_comparison(duckdb_vx_expr expr, duckdb_vx_expr_bound_comparison *out);
220220

221+
typedef struct {
222+
duckdb_vx_expr *children;
223+
size_t children_count;
224+
duckdb_vx_expr_type type;
225+
} duckdb_vx_expr_bound_conjunction;
226+
227+
void duckdb_vx_expr_get_bound_conjunction(duckdb_vx_expr expr, duckdb_vx_expr_bound_conjunction *out);
228+
221229
typedef struct {
222230
duckdb_vx_expr input;
223231
duckdb_vx_expr lower;

vortex-duckdb-ext/cpp/table_filter.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,6 @@
33
#include "duckdb/planner/filter/conjunction_filter.hpp"
44
#include "duckdb/planner/filter/dynamic_filter.hpp"
55

6-
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
7-
#include <iostream>
8-
96
using namespace duckdb;
107

118
extern "C" idx_t duckdb_vx_table_filter_set_get(duckdb_vx_table_filter_set ffi_filter_set, size_t index,

vortex-duckdb-ext/src/convert/dtype.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -167,8 +167,8 @@ impl FromLogicalType for DType {
167167
DUCKDB_TYPE::DUCKDB_TYPE_INTERVAL => todo!(),
168168
DUCKDB_TYPE::DUCKDB_TYPE_HUGEINT => todo!(),
169169
DUCKDB_TYPE::DUCKDB_TYPE_UHUGEINT => todo!(),
170-
DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR => todo!(),
171-
DUCKDB_TYPE::DUCKDB_TYPE_BLOB => todo!(),
170+
DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR => DType::Utf8(nullability),
171+
DUCKDB_TYPE::DUCKDB_TYPE_BLOB => DType::Binary(nullability),
172172
DUCKDB_TYPE::DUCKDB_TYPE_DECIMAL => todo!(),
173173
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_S => todo!(),
174174
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_MS => todo!(),

vortex-duckdb-ext/src/convert/expr.rs

Lines changed: 86 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -61,62 +61,90 @@ fn like_pattern_str(value: &Expression) -> VortexResult<Option<String>> {
6161
}
6262
}
6363

64-
pub fn try_from_bound_expression(value: &Expression) -> VortexResult<ExprRef> {
64+
pub fn try_from_bound_expression(value: &Expression) -> VortexResult<Option<ExprRef>> {
6565
let Some(value) = value.as_class() else {
66-
vortex_bail!("no expression class")
66+
vortex_bail!("no expression class id {:?}", value.as_class_id())
6767
};
68-
Ok(match value {
68+
Ok(Some(match value {
6969
ExpressionClass::BoundColumnRef(col) => get_item_scope(col.name.to_str()?),
7070
ExpressionClass::BoundConstant(const_) => lit(Scalar::try_from(const_.value)?),
7171
ExpressionClass::BoundComparison(compare) => {
7272
let operator: Operator = compare.op.try_into()?;
7373

74-
BinaryExpr::new_expr(
75-
try_from_bound_expression(&compare.left)?,
76-
operator,
77-
try_from_bound_expression(&compare.right)?,
78-
)
74+
let Some(left) = try_from_bound_expression(&compare.left)? else {
75+
return Ok(None);
76+
};
77+
let Some(right) = try_from_bound_expression(&compare.right)? else {
78+
return Ok(None);
79+
};
80+
81+
BinaryExpr::new_expr(left, operator, right)
7982
}
80-
ExpressionClass::BoundBetween(between) => Between::between(
81-
try_from_bound_expression(&between.input)?,
82-
try_from_bound_expression(&between.lower)?,
83-
try_from_bound_expression(&between.upper)?,
84-
BetweenOptions {
85-
lower_strict: if between.lower_inclusive {
86-
StrictComparison::NonStrict
87-
} else {
88-
StrictComparison::Strict
89-
},
90-
upper_strict: if between.upper_inclusive {
91-
StrictComparison::NonStrict
92-
} else {
93-
StrictComparison::Strict
83+
ExpressionClass::BoundBetween(between) => {
84+
let Some(array) = try_from_bound_expression(&between.input)? else {
85+
return Ok(None);
86+
};
87+
let Some(lower) = try_from_bound_expression(&between.lower)? else {
88+
return Ok(None);
89+
};
90+
let Some(upper) = try_from_bound_expression(&between.upper)? else {
91+
return Ok(None);
92+
};
93+
Between::between(
94+
array,
95+
lower,
96+
upper,
97+
BetweenOptions {
98+
lower_strict: if between.lower_inclusive {
99+
StrictComparison::NonStrict
100+
} else {
101+
StrictComparison::Strict
102+
},
103+
upper_strict: if between.upper_inclusive {
104+
StrictComparison::NonStrict
105+
} else {
106+
StrictComparison::Strict
107+
},
94108
},
95-
},
96-
),
109+
)
110+
}
97111
ExpressionClass::BoundOperator(operator) => match operator.op {
98112
DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_OPERATOR_NOT => {
99113
let children = operator.children().collect_vec();
100114
assert_eq!(children.len(), 1);
101-
let child = try_from_bound_expression(&children[0])?;
115+
let Some(child) = try_from_bound_expression(&children[0])? else {
116+
return Ok(None);
117+
};
102118
Not::new_expr(child)
103119
}
104120
DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_COMPARE_IN => {
105121
// First child is element, rest form the list.
106122
let children = operator.children().collect_vec();
107123
assert!(children.len() >= 2);
108-
let element = try_from_bound_expression(&children[0])?;
124+
let Some(element) = try_from_bound_expression(&children[0])? else {
125+
return Ok(None);
126+
};
109127

110-
let list_elements = children
128+
let Some(list_elements) = children
111129
.iter()
112130
.skip(1)
113131
.map(|c| {
114-
Ok(Literal::maybe_from(&try_from_bound_expression(c)?)
115-
.ok_or_else(|| vortex_err!("cannot have a non literal in a in_list"))?
116-
.value()
117-
.clone())
132+
let Some(value) = try_from_bound_expression(c)? else {
133+
return Ok(None);
134+
};
135+
Ok(Some(
136+
Literal::maybe_from(&value)
137+
.ok_or_else(|| {
138+
vortex_err!("cannot have a non literal in a in_list")
139+
})?
140+
.value()
141+
.clone(),
142+
))
118143
})
119-
.collect::<VortexResult<Vec<_>>>()?;
144+
.collect::<VortexResult<Option<Vec<_>>>>()?
145+
else {
146+
return Ok(None);
147+
};
120148
let list = Scalar::list(
121149
Arc::new(list_elements[0].dtype().clone()),
122150
list_elements,
@@ -130,16 +158,39 @@ pub fn try_from_bound_expression(value: &Expression) -> VortexResult<ExprRef> {
130158
DUCKDB_FUNCTION_NAME_CONTAINS => {
131159
let children = func.children().collect_vec();
132160
assert_eq!(children.len(), 2);
133-
let value = try_from_bound_expression(&children[0])?;
161+
let Some(value) = try_from_bound_expression(&children[0])? else {
162+
return Ok(None);
163+
};
134164
let Some(pattern_lit) = like_pattern_str(&children[1])? else {
135165
vortex_bail!("expected pattern to be bound string")
136166
};
137167
let pattern = Literal::new_expr(pattern_lit);
138168
Like::new_expr(value, pattern, false, false)
139169
}
140-
_ => todo!(),
170+
_ => {
171+
log::warn!("bound function {}", func.scalar_function.name());
172+
return Ok(None);
173+
}
141174
},
142-
})
175+
ExpressionClass::BoundConjunction(conj) => {
176+
let Some(children) = conj
177+
.children()
178+
.map(|c| try_from_bound_expression(&c))
179+
.collect::<VortexResult<Option<Vec<_>>>>()?
180+
else {
181+
return Ok(None);
182+
};
183+
match conj.op {
184+
DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_CONJUNCTION_AND => {
185+
and_collect(children).vortex_expect("cannot be empty")
186+
}
187+
DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_CONJUNCTION_OR => {
188+
or_collect(children).vortex_expect("cannot be empty")
189+
}
190+
_ => vortex_bail!("unexpected operator {:?} in bound conjunction", conj.op),
191+
}
192+
}
193+
}))
143194
}
144195

145196
impl TryFrom<DUCKDB_VX_EXPR_TYPE> for Operator {

vortex-duckdb-ext/src/convert/scalar.rs

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,9 @@ use std::sync::Arc;
2222
use vortex::buffer::ByteBuffer;
2323
use vortex::dtype::Nullability::Nullable;
2424
use vortex::dtype::PType::{I32, I64};
25-
use vortex::dtype::datetime::{TIMESTAMP_ID, TemporalMetadata, TimeUnit};
25+
use vortex::dtype::datetime::{DATE_ID, TIME_ID, TIMESTAMP_ID, TemporalMetadata, TimeUnit};
2626
use vortex::dtype::half::f16;
27-
use vortex::dtype::{DType, ExtDType, PType, match_each_native_simd_ptype};
27+
use vortex::dtype::{DType, DecimalDType, ExtDType, PType, match_each_native_simd_ptype};
2828
use vortex::error::{VortexError, VortexExpect, VortexResult, vortex_bail, vortex_err};
2929
use vortex::scalar::{
3030
BinaryScalar, BoolScalar, DecimalScalar, DecimalValue, ExtScalar, PrimitiveScalar, Scalar,
@@ -268,15 +268,15 @@ impl TryFrom<Value> for Scalar {
268268
Arc::new(DType::Primitive(I32, Nullable)),
269269
Some(TemporalMetadata::Timestamp(TimeUnit::S, None).into()),
270270
)),
271-
Scalar::from(value.as_i32()),
271+
Scalar::new(DType::Primitive(I32, Nullable), value.as_i32().into()),
272272
)),
273273
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_MS => Ok(Scalar::extension(
274274
Arc::new(ExtDType::new(
275275
TIMESTAMP_ID.clone(),
276276
Arc::new(DType::Primitive(I32, Nullable)),
277277
Some(TemporalMetadata::Timestamp(TimeUnit::Ms, None).into()),
278278
)),
279-
Scalar::from(value.as_i64()),
279+
Scalar::new(DType::Primitive(I32, Nullable), value.as_i32().into()),
280280
)),
281281
// Us
282282
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP => Ok(Scalar::extension(
@@ -285,16 +285,40 @@ impl TryFrom<Value> for Scalar {
285285
Arc::new(DType::Primitive(I64, Nullable)),
286286
Some(TemporalMetadata::Timestamp(TimeUnit::Us, None).into()),
287287
)),
288-
Scalar::from(value.as_i64()),
288+
Scalar::new(DType::Primitive(I64, Nullable), value.as_i64().into()),
289289
)),
290290
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_NS => Ok(Scalar::extension(
291291
Arc::new(ExtDType::new(
292292
TIMESTAMP_ID.clone(),
293293
Arc::new(DType::Primitive(I64, Nullable)),
294294
Some(TemporalMetadata::Timestamp(TimeUnit::Ns, None).into()),
295295
)),
296-
Scalar::from(value.as_i64()),
296+
Scalar::new(DType::Primitive(I64, Nullable), value.as_i64().into()),
297297
)),
298+
DUCKDB_TYPE::DUCKDB_TYPE_DATE => Ok(Scalar::extension(
299+
Arc::new(ExtDType::new(
300+
DATE_ID.clone(),
301+
Arc::new(DType::Primitive(I32, Nullable)),
302+
Some(TemporalMetadata::Date(TimeUnit::D).into()),
303+
)),
304+
Scalar::new(DType::Primitive(I32, Nullable), value.as_i32().into()),
305+
)),
306+
DUCKDB_TYPE::DUCKDB_TYPE_TIME => Ok(Scalar::extension(
307+
Arc::new(ExtDType::new(
308+
TIME_ID.clone(),
309+
Arc::new(DType::Primitive(I64, Nullable)),
310+
Some(TemporalMetadata::Date(TimeUnit::Us).into()),
311+
)),
312+
Scalar::new(DType::Primitive(I64, Nullable), value.as_i64().into()),
313+
)),
314+
DUCKDB_TYPE::DUCKDB_TYPE_DECIMAL => {
315+
let (width, scale) = value.logical_type().as_decimal();
316+
Ok(Scalar::decimal(
317+
DecimalValue::I128(value.as_i128()),
318+
DecimalDType::new(width, scale.try_into()?),
319+
Nullable,
320+
))
321+
}
298322

299323
_ => todo!("cannot convert value into scalar {value:?}"),
300324
}

vortex-duckdb-ext/src/duckdb/expr.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use std::ffi::{CStr, c_void};
22
use std::fmt::{Display, Formatter};
33
use std::ptr;
44

5+
use crate::cpp::duckdb_vx_expr_class;
56
use crate::duckdb::{ScalarFunction, Value};
67
use crate::{cpp, wrapper};
78

@@ -18,6 +19,10 @@ impl Display for Expression {
1819
}
1920

2021
impl Expression {
22+
pub fn as_class_id(&self) -> duckdb_vx_expr_class {
23+
unsafe { cpp::duckdb_vx_expr_get_class(self.as_ptr()) }
24+
}
25+
2126
/// Match the subclass of the expression.
2227
pub fn as_class(&self) -> Option<ExpressionClass> {
2328
Some(
@@ -35,6 +40,22 @@ impl Expression {
3540
};
3641
ExpressionClass::BoundConstant(BoundConstant { value })
3742
}
43+
cpp::DUCKDB_VX_EXPR_CLASS::DUCKDB_VX_EXPR_CLASS_BOUND_CONJUNCTION => {
44+
let mut out = cpp::duckdb_vx_expr_bound_conjunction {
45+
children: ptr::null_mut(),
46+
children_count: 0,
47+
type_: cpp::DUCKDB_VX_EXPR_TYPE::DUCKDB_VX_EXPR_TYPE_INVALID,
48+
};
49+
unsafe { cpp::duckdb_vx_expr_get_bound_conjunction(self.as_ptr(), &mut out) };
50+
51+
let children =
52+
unsafe { std::slice::from_raw_parts(out.children, out.children_count) };
53+
54+
ExpressionClass::BoundConjunction(BoundConjunction {
55+
children,
56+
op: out.type_,
57+
})
58+
}
3859
cpp::DUCKDB_VX_EXPR_CLASS::DUCKDB_VX_EXPR_CLASS_BOUND_COMPARISON => {
3960
let mut out = cpp::duckdb_vx_expr_bound_comparison {
4061
left: ptr::null_mut(),
@@ -115,6 +136,7 @@ pub enum ExpressionClass<'a> {
115136
BoundColumnRef(BoundColumnRef<'a>),
116137
BoundConstant(BoundConstant),
117138
BoundComparison(BoundComparison),
139+
BoundConjunction(BoundConjunction<'a>),
118140
BoundBetween(BoundBetween),
119141
BoundOperator(BoundOperator<'a>),
120142
BoundFunction(BoundFunction<'a>),
@@ -142,6 +164,20 @@ pub struct BoundBetween {
142164
pub upper_inclusive: bool,
143165
}
144166

167+
pub struct BoundConjunction<'a> {
168+
children: &'a [cpp::duckdb_vx_expr],
169+
pub op: cpp::DUCKDB_VX_EXPR_TYPE,
170+
}
171+
172+
impl BoundConjunction<'_> {
173+
/// Returns the children expressions of the bound operator.
174+
pub fn children(&self) -> impl Iterator<Item = Expression> {
175+
self.children
176+
.iter()
177+
.map(|&child| unsafe { Expression::borrow(child) })
178+
}
179+
}
180+
145181
pub struct BoundOperator<'a> {
146182
children: &'a [cpp::duckdb_vx_expr],
147183
pub op: cpp::DUCKDB_VX_EXPR_TYPE,

0 commit comments

Comments
 (0)