Skip to content

Commit c10d553

Browse files
authored
feat: conjunctive & negative normal forms (#1857)
NNF: `Not` only appears around "terminal" nodes (i.e. nodes other than the binary expressions). CNF: A conjunction of disjunctions of NNF expressions.
1 parent 5696233 commit c10d553

File tree

9 files changed

+451
-21
lines changed

9 files changed

+451
-21
lines changed

vortex-expr/src/forms/cnf.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
use vortex_error::VortexResult;
2+
3+
use super::nnf::nnf;
4+
use crate::traversal::{Node as _, NodeVisitor, TraversalOrder};
5+
use crate::{BinaryExpr, ExprRef, Operator};
6+
7+
/// Return an equivalent expression in Conjunctive Normal Form (CNF).
8+
///
9+
/// A CNF expression is a vector of vectors. The outer vector is a conjunction. The inner vectors
10+
/// are disjunctions. Neither [Operator::And] nor [Operator::Or] may appear in the
11+
/// disjunctions. Moreover, each disjunction in a CNF expression must be in Negative Normal Form.
12+
///
13+
/// # Examples
14+
///
15+
/// All the NNF examples also apply to CNF, for example double negation is removed entirely:
16+
///
17+
/// ```
18+
/// use vortex_expr::{not, col};
19+
/// use vortex_expr::forms::cnf::cnf;
20+
///
21+
/// let double_negation = not(not(col("a")));
22+
/// let cnfed = cnf(double_negation).unwrap();
23+
/// assert_eq!(cnfed, vec![vec![col("a")]]);
24+
/// ```
25+
///
26+
/// Unlike NNF, CNF, lifts conjunctions to the top-level and distributions disjunctions such that
27+
/// there is at most one disjunction for each conjunction operand:
28+
///
29+
/// ```
30+
/// use vortex_expr::{not, col, or, and};
31+
/// use vortex_expr::forms::cnf::cnf;
32+
///
33+
/// assert_eq!(
34+
/// cnf(
35+
/// or(
36+
/// or(
37+
/// and(col("a"), col("b")),
38+
/// col("c")
39+
/// ),
40+
/// col("d")
41+
/// )
42+
/// ).unwrap(),
43+
/// vec![
44+
/// vec![col("a"), col("c"), col("d")],
45+
/// vec![col("b"), col("c"), col("d")]
46+
/// ]
47+
/// );
48+
/// ```
49+
///
50+
/// ```
51+
/// use vortex_expr::{not, col, or, and};
52+
/// use vortex_expr::forms::cnf::cnf;
53+
///
54+
/// assert_eq!(
55+
/// cnf(
56+
/// or(
57+
/// and(col("a"), col("b")),
58+
/// col("c"),
59+
/// )
60+
/// ).unwrap(),
61+
/// vec![
62+
/// vec![col("a"), col("c")],
63+
/// vec![col("b"), col("c")],
64+
/// ]
65+
/// );
66+
/// ```
67+
///
68+
/// Vortex extends the CNF definition to any Boolean-valued expression, even ones with non-Boolean
69+
/// parameters:
70+
///
71+
/// ```
72+
/// use vortex_expr::{not, col, or, and, gt_eq, lit, not_eq, lt, eq};
73+
/// use vortex_expr::forms::cnf::cnf;
74+
/// use itertools::Itertools;
75+
///
76+
/// assert_eq!(
77+
/// cnf(
78+
/// or(
79+
/// and(
80+
/// gt_eq(col("earnings"), lit(50_000)),
81+
/// not_eq(col("role"), lit("Manager"))
82+
/// ),
83+
/// col("special_flag")
84+
/// ),
85+
/// ).unwrap(),
86+
/// vec![
87+
/// vec![
88+
/// gt_eq(col("earnings"), lit(50_000)),
89+
/// col("special_flag")
90+
/// ],
91+
/// vec![
92+
/// not_eq(col("role"), lit("Manager")),
93+
/// col("special_flag")
94+
/// ]
95+
/// ]
96+
/// );
97+
/// ```
98+
///
99+
/// ```
100+
/// use vortex_expr::{not, col, or, and, gt_eq, lit, not_eq, lt, eq};
101+
/// use vortex_expr::forms::cnf::cnf;
102+
/// use itertools::Itertools;
103+
///
104+
/// assert_eq!(
105+
/// cnf(
106+
/// or(
107+
/// or(
108+
/// and(
109+
/// gt_eq(col("earnings"), lit(50_000)),
110+
/// not_eq(col("role"), lit("Manager"))
111+
/// ),
112+
/// col("special_flag")
113+
/// ),
114+
/// and(
115+
/// lt(col("tenure"), lit(5)),
116+
/// eq(col("role"), lit("Engineer"))
117+
/// ),
118+
/// )
119+
/// ).unwrap(),
120+
/// vec![
121+
/// vec![
122+
/// gt_eq(col("earnings"), lit(50_000)),
123+
/// col("special_flag"),
124+
/// lt(col("tenure"), lit(5)),
125+
/// ],
126+
/// vec![
127+
/// gt_eq(col("earnings"), lit(50_000)),
128+
/// col("special_flag"),
129+
/// eq(col("role"), lit("Engineer")),
130+
/// ],
131+
/// vec![
132+
/// not_eq(col("role"), lit("Manager")),
133+
/// col("special_flag"),
134+
/// lt(col("tenure"), lit(5)),
135+
/// ],
136+
/// vec![
137+
/// not_eq(col("role"), lit("Manager")),
138+
/// col("special_flag"),
139+
/// eq(col("role"), lit("Engineer")),
140+
/// ],
141+
/// ]
142+
/// );
143+
/// ```
144+
///
145+
pub fn cnf(expr: ExprRef) -> VortexResult<Vec<Vec<ExprRef>>> {
146+
let nnf = nnf(expr)?;
147+
148+
let mut visitor = CNFVisitor::default();
149+
nnf.accept(&mut visitor)?;
150+
Ok(visitor.finish())
151+
}
152+
153+
#[derive(Default)]
154+
struct CNFVisitor {
155+
conjuncts_of_disjuncts: Vec<Vec<ExprRef>>,
156+
}
157+
158+
impl CNFVisitor {
159+
fn finish(self) -> Vec<Vec<ExprRef>> {
160+
self.conjuncts_of_disjuncts
161+
}
162+
}
163+
164+
impl NodeVisitor<'_> for CNFVisitor {
165+
type NodeTy = ExprRef;
166+
167+
fn visit_down(&mut self, node: &ExprRef) -> VortexResult<TraversalOrder> {
168+
if let Some(binary_expr) = node.as_any().downcast_ref::<BinaryExpr>() {
169+
match binary_expr.op() {
170+
Operator::And => return Ok(TraversalOrder::Continue),
171+
Operator::Or => {
172+
let mut visitor = CNFVisitor::default();
173+
binary_expr.lhs().accept(&mut visitor)?;
174+
let lhs_conjuncts = visitor.finish();
175+
176+
let mut visitor = CNFVisitor::default();
177+
binary_expr.rhs().accept(&mut visitor)?;
178+
let rhs_conjuncts = visitor.finish();
179+
180+
self.conjuncts_of_disjuncts
181+
.extend(lhs_conjuncts.iter().flat_map(|lhs_disjunct| {
182+
rhs_conjuncts.iter().map(|rhs_disjunct| {
183+
let mut lhs_copy = lhs_disjunct.clone();
184+
lhs_copy.extend(rhs_disjunct.iter().cloned());
185+
lhs_copy
186+
})
187+
}));
188+
189+
return Ok(TraversalOrder::Skip);
190+
}
191+
_ => {}
192+
}
193+
}
194+
// Anything other than And and Or are terminals from the perspective of CNF
195+
self.conjuncts_of_disjuncts.push(vec![node.clone()]);
196+
Ok(TraversalOrder::Skip)
197+
}
198+
}

vortex-expr/src/forms/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pub mod cnf;
2+
pub mod nnf;

vortex-expr/src/forms/nnf.rs

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
use vortex_error::VortexResult;
2+
3+
use crate::traversal::{FoldChildren, FoldDown, FoldUp, Folder, Node as _};
4+
use crate::{not, BinaryExpr, ExprRef, Not, Operator};
5+
6+
/// Return an equivalent expression in Negative Normal Form (NNF).
7+
///
8+
/// In NNF, [Not] expressions may only contain terminal nodes such as [Literal](crate::Literal) or
9+
/// [Column](crate::Column). They *may not* contain [BinaryExpr], [Not], etc.
10+
///
11+
/// # Examples
12+
///
13+
/// Double negation is removed entirely:
14+
///
15+
/// ```
16+
/// use vortex_expr::{not, col};
17+
/// use vortex_expr::forms::nnf::nnf;
18+
///
19+
/// let double_negation = not(not(col("a")));
20+
/// let nnfed = nnf(double_negation).unwrap();
21+
/// assert_eq!(&nnfed, &col("a"));
22+
/// ```
23+
///
24+
/// Triple negation becomes single negation:
25+
///
26+
/// ```
27+
/// use vortex_expr::{not, col};
28+
/// use vortex_expr::forms::nnf::nnf;
29+
///
30+
/// let triple_negation = not(not(not(col("a"))));
31+
/// let nnfed = nnf(triple_negation).unwrap();
32+
/// assert_eq!(&nnfed, &not(col("a")));
33+
/// ```
34+
///
35+
/// Negation at a high-level is pushed to the leaves, likely increasing the total number nodes:
36+
///
37+
/// ```
38+
/// use vortex_expr::{not, col, and, or};
39+
/// use vortex_expr::forms::nnf::nnf;
40+
///
41+
/// assert_eq!(
42+
/// &nnf(not(and(col("a"), col("b")))).unwrap(),
43+
/// &or(not(col("a")), not(col("b")))
44+
/// );
45+
/// ```
46+
///
47+
/// In Vortex, NNF is extended beyond simple Boolean operators to any Boolean-valued operator:
48+
///
49+
/// ```
50+
/// use vortex_expr::{not, col, and, or, lt, lit, gt_eq};
51+
/// use vortex_expr::forms::nnf::nnf;
52+
///
53+
/// assert_eq!(
54+
/// &nnf(not(and(gt_eq(col("a"), lit(3)), col("b")))).unwrap(),
55+
/// &or(lt(col("a"), lit(3)), not(col("b")))
56+
/// );
57+
/// ```
58+
pub fn nnf(expr: ExprRef) -> VortexResult<ExprRef> {
59+
let mut visitor = NNFVisitor::default();
60+
Ok(expr.transform_with_context(&mut visitor, false)?.result())
61+
}
62+
63+
#[derive(Default)]
64+
struct NNFVisitor {}
65+
66+
impl Folder for NNFVisitor {
67+
type NodeTy = ExprRef;
68+
type Out = ExprRef;
69+
type Context = bool;
70+
71+
fn visit_down(
72+
&mut self,
73+
node: &ExprRef,
74+
negating: bool,
75+
) -> VortexResult<FoldDown<ExprRef, bool>> {
76+
let node_any = node.as_any();
77+
if node_any.downcast_ref::<Not>().is_some() {
78+
return Ok(FoldDown::Continue(!negating));
79+
} else if let Some(binary_expr) = node_any.downcast_ref::<BinaryExpr>() {
80+
match binary_expr.op() {
81+
Operator::And | Operator::Or => {
82+
return Ok(FoldDown::Continue(negating));
83+
}
84+
_ => {}
85+
}
86+
}
87+
88+
Ok(FoldDown::SkipChildren)
89+
}
90+
91+
fn visit_up(
92+
&mut self,
93+
node: ExprRef,
94+
negating: bool,
95+
new_children: FoldChildren<ExprRef>,
96+
) -> VortexResult<FoldUp<ExprRef>> {
97+
let node_any = node.as_any();
98+
let new_node = if node_any.downcast_ref::<Not>().is_some() {
99+
let FoldChildren::Children(mut new_children) = new_children else {
100+
unreachable!();
101+
};
102+
debug_assert_eq!(new_children.len(), 1);
103+
104+
new_children.remove(0)
105+
} else if let Some(binary_expr) = node_any.downcast_ref::<BinaryExpr>() {
106+
if !negating {
107+
node
108+
} else {
109+
let new_op = match binary_expr.op() {
110+
Operator::Eq => Operator::NotEq,
111+
Operator::NotEq => Operator::Eq,
112+
Operator::Gt => Operator::Lte,
113+
Operator::Gte => Operator::Lt,
114+
Operator::Lt => Operator::Gte,
115+
Operator::Lte => Operator::Gt,
116+
Operator::And => Operator::Or,
117+
Operator::Or => Operator::And,
118+
};
119+
let (lhs, rhs) = match binary_expr.op() {
120+
Operator::Or | Operator::And => {
121+
let FoldChildren::Children(mut negated_children) = new_children else {
122+
unreachable!();
123+
};
124+
debug_assert_eq!(negated_children.len(), 2);
125+
let rhs = negated_children.remove(1);
126+
let lhs = negated_children.remove(0);
127+
(lhs, rhs)
128+
}
129+
_ => {
130+
let FoldChildren::Skipped = new_children else {
131+
unreachable!();
132+
};
133+
(binary_expr.lhs().clone(), binary_expr.rhs().clone())
134+
}
135+
};
136+
BinaryExpr::new_expr(lhs, new_op, rhs)
137+
}
138+
} else {
139+
let FoldChildren::Skipped = new_children else {
140+
unreachable!();
141+
};
142+
143+
if negating {
144+
not(node)
145+
} else {
146+
node
147+
}
148+
};
149+
Ok(FoldUp::Continue(new_node))
150+
}
151+
}

vortex-expr/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::sync::Arc;
55
mod binary;
66
mod column;
77
pub mod datafusion;
8+
pub mod forms;
89
mod get_item;
910
mod identity;
1011
mod like;
@@ -183,7 +184,7 @@ mod tests {
183184
"(($col1 < $col2) or ($col1 != $col2))"
184185
);
185186

186-
assert_eq!(Not::new_expr(col1.clone()).to_string(), "!$col1");
187+
assert_eq!(not(col1.clone()).to_string(), "!$col1");
187188

188189
assert_eq!(
189190
Select::include_expr(vec![Field::from("col1")], ident()).to_string(),

vortex-expr/src/like.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,16 +100,14 @@ impl Eq for Like {}
100100

101101
#[cfg(test)]
102102
mod tests {
103-
use std::sync::Arc;
104-
105103
use vortex_array::array::BoolArray;
106104
use vortex_array::IntoArrayVariant;
107105

108-
use crate::{Identity, Not};
106+
use crate::{ident, not};
109107

110108
#[test]
111109
fn invert_booleans() {
112-
let not_expr = Not::new_expr(Arc::new(Identity));
110+
let not_expr = not(ident());
113111
let bools = BoolArray::from_iter([false, true, false, false, true, true]);
114112
assert_eq!(
115113
not_expr

0 commit comments

Comments
 (0)