Skip to content

Commit a533ce6

Browse files
committed
Partial migration of filter
1 parent 21d01ae commit a533ce6

File tree

10 files changed

+686
-5
lines changed

10 files changed

+686
-5
lines changed

optd-cost-model/src/common/nodes.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ pub struct PredicateNode {
7777
/// A generic predicate node type
7878
pub typ: PredicateType,
7979
/// Child predicate nodes, always materialized
80-
pub children: Vec<PredicateNode>,
80+
pub children: Vec<ArcPredicateNode>,
8181
/// Data associated with the predicate, if any
8282
pub data: Option<Value>,
8383
}
@@ -94,3 +94,28 @@ impl std::fmt::Display for PredicateNode {
9494
write!(f, ")")
9595
}
9696
}
97+
98+
impl PredicateNode {
99+
pub fn child(&self, idx: usize) -> ArcPredicateNode {
100+
self.children[idx].clone()
101+
}
102+
103+
pub fn unwrap_data(&self) -> Value {
104+
self.data.clone().unwrap()
105+
}
106+
}
107+
pub trait ReprPredicateNode: 'static + Clone {
108+
fn into_pred_node(self) -> ArcPredicateNode;
109+
110+
fn from_pred_node(pred_node: ArcPredicateNode) -> Option<Self>;
111+
}
112+
113+
impl ReprPredicateNode for ArcPredicateNode {
114+
fn into_pred_node(self) -> ArcPredicateNode {
115+
self
116+
}
117+
118+
fn from_pred_node(pred_node: ArcPredicateNode) -> Option<Self> {
119+
Some(pred_node)
120+
}
121+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
use crate::common::{
2+
nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode},
3+
values::Value,
4+
};
5+
6+
#[derive(Clone, Debug)]
7+
pub struct AttributeRefPred(pub ArcPredicateNode);
8+
9+
impl AttributeRefPred {
10+
/// Creates a new `ColumnRef` expression.
11+
pub fn new(column_idx: usize) -> AttributeRefPred {
12+
// this conversion is always safe since usize is at most u64
13+
let u64_column_idx = column_idx as u64;
14+
AttributeRefPred(
15+
PredicateNode {
16+
typ: PredicateType::AttributeRef,
17+
children: vec![],
18+
data: Some(Value::UInt64(u64_column_idx)),
19+
}
20+
.into(),
21+
)
22+
}
23+
24+
fn get_data_usize(&self) -> usize {
25+
self.0.data.as_ref().unwrap().as_u64() as usize
26+
}
27+
28+
/// Gets the column index.
29+
pub fn index(&self) -> usize {
30+
self.get_data_usize()
31+
}
32+
}
33+
34+
impl ReprPredicateNode for AttributeRefPred {
35+
fn into_pred_node(self) -> ArcPredicateNode {
36+
self.0
37+
}
38+
39+
fn from_pred_node(pred_node: ArcPredicateNode) -> Option<Self> {
40+
if pred_node.typ != PredicateType::AttributeRef {
41+
return None;
42+
}
43+
Some(Self(pred_node))
44+
}
45+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
use arrow_schema::DataType;
2+
3+
use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode};
4+
5+
use super::data_type_pred::DataTypePred;
6+
7+
#[derive(Clone, Debug)]
8+
pub struct CastPred(pub ArcPredicateNode);
9+
10+
impl CastPred {
11+
pub fn new(child: ArcPredicateNode, cast_to: DataType) -> Self {
12+
CastPred(
13+
PredicateNode {
14+
typ: PredicateType::Cast,
15+
children: vec![child, DataTypePred::new(cast_to).into_pred_node()],
16+
data: None,
17+
}
18+
.into(),
19+
)
20+
}
21+
22+
pub fn child(&self) -> ArcPredicateNode {
23+
self.0.child(0)
24+
}
25+
26+
pub fn cast_to(&self) -> DataType {
27+
DataTypePred::from_pred_node(self.0.child(1))
28+
.unwrap()
29+
.data_type()
30+
}
31+
}
32+
33+
impl ReprPredicateNode for CastPred {
34+
fn into_pred_node(self) -> ArcPredicateNode {
35+
self.0
36+
}
37+
38+
fn from_pred_node(pred_node: ArcPredicateNode) -> Option<Self> {
39+
if !matches!(pred_node.typ, PredicateType::Cast) {
40+
return None;
41+
}
42+
Some(Self(pred_node))
43+
}
44+
}

optd-cost-model/src/common/predicates/constant_pred.rs

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
1+
use std::sync::Arc;
2+
3+
use arrow_schema::{DataType, IntervalUnit};
14
use serde::{Deserialize, Serialize};
25

6+
use crate::common::{
7+
nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode},
8+
values::{SerializableOrderedF64, Value},
9+
};
10+
311
/// TODO: documentation
412
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, Serialize, Deserialize)]
513
pub enum ConstantType {
@@ -19,3 +27,184 @@ pub enum ConstantType {
1927
Decimal,
2028
Binary,
2129
}
30+
31+
impl ConstantType {
32+
pub fn get_data_type_from_value(value: &Value) -> Self {
33+
match value {
34+
Value::Bool(_) => ConstantType::Bool,
35+
Value::String(_) => ConstantType::Utf8String,
36+
Value::UInt8(_) => ConstantType::UInt8,
37+
Value::UInt16(_) => ConstantType::UInt16,
38+
Value::UInt32(_) => ConstantType::UInt32,
39+
Value::UInt64(_) => ConstantType::UInt64,
40+
Value::Int8(_) => ConstantType::Int8,
41+
Value::Int16(_) => ConstantType::Int16,
42+
Value::Int32(_) => ConstantType::Int32,
43+
Value::Int64(_) => ConstantType::Int64,
44+
Value::Float(_) => ConstantType::Float64,
45+
Value::Date32(_) => ConstantType::Date,
46+
_ => unimplemented!("get_data_type_from_value() not implemented for value {value}"),
47+
}
48+
}
49+
50+
// TODO: current DataType and ConstantType are not 1 to 1 mapping
51+
// optd schema stores constantType from data type in catalog.get
52+
// for decimal128, the precision is lost
53+
pub fn from_data_type(data_type: DataType) -> Self {
54+
match data_type {
55+
DataType::Binary => ConstantType::Binary,
56+
DataType::Boolean => ConstantType::Bool,
57+
DataType::UInt8 => ConstantType::UInt8,
58+
DataType::UInt16 => ConstantType::UInt16,
59+
DataType::UInt32 => ConstantType::UInt32,
60+
DataType::UInt64 => ConstantType::UInt64,
61+
DataType::Int8 => ConstantType::Int8,
62+
DataType::Int16 => ConstantType::Int16,
63+
DataType::Int32 => ConstantType::Int32,
64+
DataType::Int64 => ConstantType::Int64,
65+
DataType::Float64 => ConstantType::Float64,
66+
DataType::Date32 => ConstantType::Date,
67+
DataType::Interval(IntervalUnit::MonthDayNano) => ConstantType::IntervalMonthDateNano,
68+
DataType::Utf8 => ConstantType::Utf8String,
69+
DataType::Decimal128(_, _) => ConstantType::Decimal,
70+
_ => unimplemented!("no conversion to ConstantType for DataType {data_type}"),
71+
}
72+
}
73+
74+
pub fn into_data_type(&self) -> DataType {
75+
match self {
76+
ConstantType::Binary => DataType::Binary,
77+
ConstantType::Bool => DataType::Boolean,
78+
ConstantType::UInt8 => DataType::UInt8,
79+
ConstantType::UInt16 => DataType::UInt16,
80+
ConstantType::UInt32 => DataType::UInt32,
81+
ConstantType::UInt64 => DataType::UInt64,
82+
ConstantType::Int8 => DataType::Int8,
83+
ConstantType::Int16 => DataType::Int16,
84+
ConstantType::Int32 => DataType::Int32,
85+
ConstantType::Int64 => DataType::Int64,
86+
ConstantType::Float64 => DataType::Float64,
87+
ConstantType::Date => DataType::Date32,
88+
ConstantType::IntervalMonthDateNano => DataType::Interval(IntervalUnit::MonthDayNano),
89+
ConstantType::Decimal => DataType::Float64,
90+
ConstantType::Utf8String => DataType::Utf8,
91+
}
92+
}
93+
}
94+
95+
#[derive(Clone, Debug)]
96+
pub struct ConstantPred(pub ArcPredicateNode);
97+
98+
impl ConstantPred {
99+
pub fn new(value: Value) -> Self {
100+
let typ = ConstantType::get_data_type_from_value(&value);
101+
Self::new_with_type(value, typ)
102+
}
103+
104+
pub fn new_with_type(value: Value, typ: ConstantType) -> Self {
105+
ConstantPred(
106+
PredicateNode {
107+
typ: PredicateType::Constant(typ),
108+
children: vec![],
109+
data: Some(value),
110+
}
111+
.into(),
112+
)
113+
}
114+
115+
pub fn bool(value: bool) -> Self {
116+
Self::new_with_type(Value::Bool(value), ConstantType::Bool)
117+
}
118+
119+
pub fn string(value: impl AsRef<str>) -> Self {
120+
Self::new_with_type(
121+
Value::String(value.as_ref().into()),
122+
ConstantType::Utf8String,
123+
)
124+
}
125+
126+
pub fn uint8(value: u8) -> Self {
127+
Self::new_with_type(Value::UInt8(value), ConstantType::UInt8)
128+
}
129+
130+
pub fn uint16(value: u16) -> Self {
131+
Self::new_with_type(Value::UInt16(value), ConstantType::UInt16)
132+
}
133+
134+
pub fn uint32(value: u32) -> Self {
135+
Self::new_with_type(Value::UInt32(value), ConstantType::UInt32)
136+
}
137+
138+
pub fn uint64(value: u64) -> Self {
139+
Self::new_with_type(Value::UInt64(value), ConstantType::UInt64)
140+
}
141+
142+
pub fn int8(value: i8) -> Self {
143+
Self::new_with_type(Value::Int8(value), ConstantType::Int8)
144+
}
145+
146+
pub fn int16(value: i16) -> Self {
147+
Self::new_with_type(Value::Int16(value), ConstantType::Int16)
148+
}
149+
150+
pub fn int32(value: i32) -> Self {
151+
Self::new_with_type(Value::Int32(value), ConstantType::Int32)
152+
}
153+
154+
pub fn int64(value: i64) -> Self {
155+
Self::new_with_type(Value::Int64(value), ConstantType::Int64)
156+
}
157+
158+
pub fn interval_month_day_nano(value: i128) -> Self {
159+
Self::new_with_type(Value::Int128(value), ConstantType::IntervalMonthDateNano)
160+
}
161+
162+
pub fn float64(value: f64) -> Self {
163+
Self::new_with_type(
164+
Value::Float(SerializableOrderedF64(value.into())),
165+
ConstantType::Float64,
166+
)
167+
}
168+
169+
pub fn date(value: i64) -> Self {
170+
Self::new_with_type(Value::Int64(value), ConstantType::Date)
171+
}
172+
173+
pub fn decimal(value: f64) -> Self {
174+
Self::new_with_type(
175+
Value::Float(SerializableOrderedF64(value.into())),
176+
ConstantType::Decimal,
177+
)
178+
}
179+
180+
pub fn serialized(value: Arc<[u8]>) -> Self {
181+
Self::new_with_type(Value::Serialized(value), ConstantType::Binary)
182+
}
183+
184+
/// Gets the constant value.
185+
pub fn value(&self) -> Value {
186+
self.0.data.clone().unwrap()
187+
}
188+
189+
pub fn constant_type(&self) -> ConstantType {
190+
if let PredicateType::Constant(typ) = self.0.typ {
191+
typ
192+
} else {
193+
panic!("not a constant")
194+
}
195+
}
196+
}
197+
198+
impl ReprPredicateNode for ConstantPred {
199+
fn into_pred_node(self) -> ArcPredicateNode {
200+
self.0
201+
}
202+
203+
fn from_pred_node(rel_node: ArcPredicateNode) -> Option<Self> {
204+
if let PredicateType::Constant(_) = rel_node.typ {
205+
Some(Self(rel_node))
206+
} else {
207+
None
208+
}
209+
}
210+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
use arrow_schema::DataType;
2+
3+
use crate::common::nodes::{ArcPredicateNode, PredicateNode, PredicateType, ReprPredicateNode};
4+
5+
#[derive(Clone, Debug)]
6+
pub struct DataTypePred(pub ArcPredicateNode);
7+
8+
impl DataTypePred {
9+
pub fn new(typ: DataType) -> Self {
10+
DataTypePred(
11+
PredicateNode {
12+
typ: PredicateType::DataType(typ),
13+
children: vec![],
14+
data: None,
15+
}
16+
.into(),
17+
)
18+
}
19+
20+
pub fn data_type(&self) -> DataType {
21+
if let PredicateType::DataType(ref data_type) = self.0.typ {
22+
data_type.clone()
23+
} else {
24+
panic!("not a data type")
25+
}
26+
}
27+
}
28+
29+
impl ReprPredicateNode for DataTypePred {
30+
fn into_pred_node(self) -> ArcPredicateNode {
31+
self.0
32+
}
33+
34+
fn from_pred_node(pred_node: ArcPredicateNode) -> Option<Self> {
35+
if !matches!(pred_node.typ, PredicateType::DataType(_)) {
36+
return None;
37+
}
38+
Some(Self(pred_node))
39+
}
40+
}

optd-cost-model/src/common/predicates/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
pub mod attr_ref_pred;
12
pub mod bin_op_pred;
3+
pub mod cast_pred;
24
pub mod constant_pred;
5+
pub mod data_type_pred;
36
pub mod func_pred;
47
pub mod log_op_pred;
58
pub mod sort_order_pred;

0 commit comments

Comments
 (0)