Skip to content

Commit 39c6b60

Browse files
committed
sql: clean up parser module
1 parent c2b0f7f commit 39c6b60

File tree

8 files changed

+408
-196
lines changed

8 files changed

+408
-196
lines changed

src/sql/engine/local.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@ use crate::storage::{self, mvcc};
2020
/// Uses Cow to allow encoding borrowed values but decoding owned values.
2121
#[derive(Debug, Deserialize, Serialize)]
2222
pub enum Key<'a> {
23-
/// A table schema, keyed by table name.
23+
/// A table schema, keyed by table name. The value is a `sql::types::Table`.
2424
Table(Cow<'a, str>),
2525
/// A column index entry, keyed by table name, column name, and index value.
26+
/// The value is a `BTreeSet` of `sql::types::Value` primary key values.
2627
Index(Cow<'a, str>, Cow<'a, str>, Cow<'a, Value>),
27-
/// A table row, keyed by table name and primary key value.
28+
/// A table row, keyed by table name and primary key value. The value is a
29+
/// `sql::types::Row`.
2830
Row(Cow<'a, str>, Cow<'a, Value>),
2931
}
3032

src/sql/engine/session.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ impl<'a, E: Engine<'a>> Session<'a, E> {
3131
pub fn execute(&mut self, statement: &str) -> Result<StatementResult> {
3232
// Parse and execute the statement. Transaction control is handled here,
3333
// other statements are handled by the SQL executor.
34-
Ok(match Parser::new(statement).parse()? {
34+
Ok(match Parser::parse(statement)? {
3535
// BEGIN: starts a new transaction and returns its state.
3636
ast::Statement::Begin { read_only, as_of } => {
3737
if self.txn.is_some() {

src/sql/mod.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ mod tests {
194194

195195
// Output the plan if requested.
196196
if tags.remove("plan") {
197-
let ast = Parser::new(input).parse()?;
197+
let ast = Parser::parse(input)?;
198198
let plan =
199199
session.with_txn(true, |txn| Planner::new(txn).build(ast)?.optimize())?;
200200
writeln!(output, "{plan}")?;
@@ -205,7 +205,7 @@ mod tests {
205205
if tags.contains("plan") {
206206
return Err("using both plan and opt is redundant".into());
207207
}
208-
let ast = Parser::new(input).parse()?;
208+
let ast = Parser::parse(input)?;
209209
let plan = session.with_txn(true, |txn| Planner::new(txn).build(ast))?;
210210
let Plan::Select(mut root) = plan else {
211211
return Err("can only use opt with SELECT plans".into());
@@ -289,11 +289,7 @@ mod tests {
289289
let mut tags = command.tags.clone();
290290

291291
// Parse and build the expression.
292-
let mut parser = Parser::new(input);
293-
let ast = parser.parse_expression()?;
294-
if let Some(next) = parser.lexer.next().transpose()? {
295-
return Err(format!("unconsumed token {next}").into());
296-
}
292+
let ast = Parser::parse_expr(input)?;
297293
let expr = Planner::<Catalog>::build_expression(ast, &Scope::new())?;
298294

299295
// Evaluate the expression.

src/sql/parser/ast.rs

Lines changed: 134 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -1,48 +1,81 @@
11
use std::collections::BTreeMap;
2+
use std::hash::{Hash, Hasher};
23

34
use crate::sql::types::DataType;
45

5-
/// The statement is the root node of the Abstract Syntax Tree, and describes
6-
/// the syntactic structure of a SQL query. It is built from a raw SQL string by
7-
/// the parser, and passed on to the planner which validates it and builds an
6+
/// SQL statements are represented as an Abstract Syntax Tree (AST). The
7+
/// statement is the root node of this tree, and describes the syntactic
8+
/// structure of a SQL statement. It is built from a raw SQL string by the
9+
/// parser, and passed on to the planner which validates it and builds an
810
/// execution plan from it.
911
#[derive(Debug)]
1012
pub enum Statement {
11-
/// Begin a new transaction.
12-
Begin { read_only: bool, as_of: Option<u64> },
13-
/// Commit a transaction.
13+
/// BEGIN: begins a new transaction.
14+
Begin {
15+
/// READ ONLY: if true, begin a read-only transaction.
16+
read_only: bool,
17+
/// AS OF: if given, the MVCC version to read at.
18+
as_of: Option<u64>,
19+
},
20+
/// COMMIT: commits a transaction.
1421
Commit,
15-
/// Roll back a transaction.
22+
/// ROLLBACK: rolls back a transaction.
1623
Rollback,
17-
/// Explain a statement.
24+
/// EXPLAIN: explains a SQL statement's execution plan.
1825
Explain(Box<Statement>),
19-
/// Create a new table.
20-
CreateTable { name: String, columns: Vec<Column> },
21-
/// Drop a table.
22-
DropTable { name: String, if_exists: bool },
23-
/// Delete matching rows.
24-
Delete { table: String, r#where: Option<Expression> },
25-
/// Insert new rows into a table.
26+
/// CREATE TABLE: creates a new table.
27+
CreateTable {
28+
/// The table name.
29+
name: String,
30+
/// Column specifications.
31+
columns: Vec<Column>,
32+
},
33+
/// DROP TABLE: drops a table.
34+
DropTable {
35+
/// The table to drop.
36+
name: String,
37+
/// IF EXISTS: if true, don't error if the table doesn't exist.
38+
if_exists: bool,
39+
},
40+
/// DELETE: deletes rows from a table.
41+
Delete {
42+
/// The table to delete from.
43+
table: String,
44+
/// WHERE: optional condition to match rows to delete.
45+
r#where: Option<Expression>,
46+
},
47+
/// INSERT INTO: inserts new rows into a table.
2648
Insert {
49+
/// Table to insert into.
2750
table: String,
28-
columns: Option<Vec<String>>, // columns given in values, using default for rest
29-
values: Vec<Vec<Expression>>, // rows to insert
51+
/// Columns to insert values into. If None, all columns are used.
52+
columns: Option<Vec<String>>,
53+
/// Row values to insert.
54+
values: Vec<Vec<Expression>>,
3055
},
31-
/// Update rows in a table.
56+
/// UPDATE: updates rows in a table.
3257
Update {
3358
table: String,
3459
set: BTreeMap<String, Option<Expression>>, // column → value, None for default value
3560
r#where: Option<Expression>,
3661
},
37-
/// Select matching rows.
62+
/// SELECT: selects rows, possibly from a table.
3863
Select {
39-
select: Vec<(Expression, Option<String>)>, // optional column aliases
64+
/// Expressions to select, with an optional column alias.
65+
select: Vec<(Expression, Option<String>)>,
66+
/// FROM: tables to select from.
4067
from: Vec<From>,
68+
/// WHERE: optional condition to filter rows.
4169
r#where: Option<Expression>,
70+
/// GROUP BY: expressions to group and aggregate by.
4271
group_by: Vec<Expression>,
72+
/// HAVING: expression to filter groups by.
4373
having: Option<Expression>,
74+
/// ORDER BY: expresisions to sort by, with direction.
4475
order_by: Vec<(Expression, Direction)>,
76+
/// OFFSET: row offset to start from.
4577
offset: Option<Expression>,
78+
/// LIMIT: maximum number of rows to return.
4679
limit: Option<Expression>,
4780
},
4881
}
@@ -51,9 +84,23 @@ pub enum Statement {
5184
#[derive(Debug)]
5285
pub enum From {
5386
/// A table.
54-
Table { name: String, alias: Option<String> },
87+
Table {
88+
/// The table name.
89+
name: String,
90+
/// An optional alias for the table.
91+
alias: Option<String>,
92+
},
5593
/// A join of two or more tables (may be nested).
56-
Join { left: Box<From>, right: Box<From>, r#type: JoinType, predicate: Option<Expression> },
94+
Join {
95+
/// The left table to join,
96+
left: Box<From>,
97+
/// The right table to join.
98+
right: Box<From>,
99+
/// The join type.
100+
r#type: JoinType,
101+
/// The join condition. None for a cross join.
102+
predicate: Option<Expression>,
103+
},
57104
}
58105

59106
/// A CREATE TABLE column definition.
@@ -90,13 +137,14 @@ impl JoinType {
90137
}
91138

92139
/// ORDER BY direction.
93-
#[derive(Debug)]
140+
#[derive(Debug, Default)]
94141
pub enum Direction {
142+
#[default]
95143
Ascending,
96144
Descending,
97145
}
98146

99-
/// Expressions. Can be nested.
147+
/// SQL expressions, e.g. `a + 7 > b`. Can be nested.
100148
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
101149
pub enum Expression {
102150
/// All columns, i.e. *.
@@ -121,27 +169,27 @@ pub enum Literal {
121169
String(String),
122170
}
123171

124-
/// To allow using Expressions and Literals in e.g. hashmaps, implement simple
125-
/// equality and hash for all types, including Null and f64::NAN. This is not
126-
/// used for expression evaluation (handled by sql::types::Expression), where
127-
/// these values should not be considered equal to themselves, only in lookups.
128-
impl std::cmp::PartialEq for Literal {
172+
/// To allow using expressions and literals in e.g. hashmaps, implement simple
173+
/// equality by value for all types, including Null and f64::NAN. This only
174+
/// checks that the values are the same, and ignores SQL semantics for e.g. NULL
175+
/// and NaN (which is handled by SQL expression evaluation).
176+
impl PartialEq for Literal {
129177
fn eq(&self, other: &Self) -> bool {
130178
match (self, other) {
179+
(Self::Null, Self::Null) => true,
131180
(Self::Boolean(l), Self::Boolean(r)) => l == r,
132181
(Self::Integer(l), Self::Integer(r)) => l == r,
133-
// Implies NaN == NaN but -NaN != NaN. Similarly with +/-0.0.
134182
(Self::Float(l), Self::Float(r)) => l.to_bits() == r.to_bits(),
135183
(Self::String(l), Self::String(r)) => l == r,
136-
(l, r) => core::mem::discriminant(l) == core::mem::discriminant(r),
184+
(_, _) => false,
137185
}
138186
}
139187
}
140188

141-
impl std::cmp::Eq for Literal {}
189+
impl Eq for Literal {}
142190

143-
impl std::hash::Hash for Literal {
144-
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
191+
impl Hash for Literal {
192+
fn hash<H: Hasher>(&self, state: &mut H) {
145193
core::mem::discriminant(self).hash(state);
146194
match self {
147195
Self::Null => {}
@@ -166,7 +214,7 @@ pub enum Operator {
166214

167215
Equal(Box<Expression>, Box<Expression>), // a = b
168216
GreaterThan(Box<Expression>, Box<Expression>), // a > b
169-
GreaterThanOrEqual(Box<Expression>, Box<Expression>), // a != b
217+
GreaterThanOrEqual(Box<Expression>, Box<Expression>), // a >= b
170218
Is(Box<Expression>, Literal), // IS NULL or IS NAN
171219
LessThan(Box<Expression>, Box<Expression>), // a < b
172220
LessThanOrEqual(Box<Expression>, Box<Expression>), // a <= b
@@ -190,31 +238,33 @@ impl Expression {
190238
/// Halts and returns false if the closure returns false.
191239
pub fn walk(&self, visitor: &mut impl FnMut(&Expression) -> bool) -> bool {
192240
use Operator::*;
241+
193242
if !visitor(self) {
194243
return false;
195244
}
245+
196246
match self {
197-
Self::Operator(Add(lhs, rhs))
198-
| Self::Operator(And(lhs, rhs))
199-
| Self::Operator(Divide(lhs, rhs))
200-
| Self::Operator(Equal(lhs, rhs))
201-
| Self::Operator(Exponentiate(lhs, rhs))
202-
| Self::Operator(GreaterThan(lhs, rhs))
203-
| Self::Operator(GreaterThanOrEqual(lhs, rhs))
204-
| Self::Operator(LessThan(lhs, rhs))
205-
| Self::Operator(LessThanOrEqual(lhs, rhs))
206-
| Self::Operator(Like(lhs, rhs))
207-
| Self::Operator(Multiply(lhs, rhs))
208-
| Self::Operator(NotEqual(lhs, rhs))
209-
| Self::Operator(Or(lhs, rhs))
210-
| Self::Operator(Remainder(lhs, rhs))
211-
| Self::Operator(Subtract(lhs, rhs)) => lhs.walk(visitor) && rhs.walk(visitor),
247+
Self::Operator(op) => match op {
248+
Add(lhs, rhs)
249+
| And(lhs, rhs)
250+
| Divide(lhs, rhs)
251+
| Equal(lhs, rhs)
252+
| Exponentiate(lhs, rhs)
253+
| GreaterThan(lhs, rhs)
254+
| GreaterThanOrEqual(lhs, rhs)
255+
| LessThan(lhs, rhs)
256+
| LessThanOrEqual(lhs, rhs)
257+
| Like(lhs, rhs)
258+
| Multiply(lhs, rhs)
259+
| NotEqual(lhs, rhs)
260+
| Or(lhs, rhs)
261+
| Remainder(lhs, rhs)
262+
| Subtract(lhs, rhs) => lhs.walk(visitor) && rhs.walk(visitor),
212263

213-
Self::Operator(Factorial(expr))
214-
| Self::Operator(Identity(expr))
215-
| Self::Operator(Is(expr, _))
216-
| Self::Operator(Negate(expr))
217-
| Self::Operator(Not(expr)) => expr.walk(visitor),
264+
Factorial(expr) | Identity(expr) | Is(expr, _) | Negate(expr) | Not(expr) => {
265+
expr.walk(visitor)
266+
}
267+
},
218268

219269
Self::Function(_, exprs) => exprs.iter().any(|expr| expr.walk(visitor)),
220270

@@ -230,39 +280,40 @@ impl Expression {
230280

231281
/// Find and collects expressions for which the given closure returns true,
232282
/// adding them to c. Does not recurse into matching expressions.
233-
pub fn collect(&self, visitor: &impl Fn(&Expression) -> bool, c: &mut Vec<Expression>) {
283+
pub fn collect(&self, visitor: &impl Fn(&Expression) -> bool, exprs: &mut Vec<Expression>) {
284+
use Operator::*;
285+
234286
if visitor(self) {
235-
c.push(self.clone());
287+
exprs.push(self.clone());
236288
return;
237289
}
238-
use Operator::*;
239-
match self {
240-
Self::Operator(Add(lhs, rhs))
241-
| Self::Operator(And(lhs, rhs))
242-
| Self::Operator(Divide(lhs, rhs))
243-
| Self::Operator(Equal(lhs, rhs))
244-
| Self::Operator(Exponentiate(lhs, rhs))
245-
| Self::Operator(GreaterThan(lhs, rhs))
246-
| Self::Operator(GreaterThanOrEqual(lhs, rhs))
247-
| Self::Operator(LessThan(lhs, rhs))
248-
| Self::Operator(LessThanOrEqual(lhs, rhs))
249-
| Self::Operator(Like(lhs, rhs))
250-
| Self::Operator(Multiply(lhs, rhs))
251-
| Self::Operator(NotEqual(lhs, rhs))
252-
| Self::Operator(Or(lhs, rhs))
253-
| Self::Operator(Remainder(lhs, rhs))
254-
| Self::Operator(Subtract(lhs, rhs)) => {
255-
lhs.collect(visitor, c);
256-
rhs.collect(visitor, c);
257-
}
258290

259-
Self::Operator(Factorial(expr))
260-
| Self::Operator(Identity(expr))
261-
| Self::Operator(Is(expr, _))
262-
| Self::Operator(Negate(expr))
263-
| Self::Operator(Not(expr)) => expr.collect(visitor, c),
291+
match self {
292+
Self::Operator(op) => match op {
293+
Add(lhs, rhs)
294+
| And(lhs, rhs)
295+
| Divide(lhs, rhs)
296+
| Equal(lhs, rhs)
297+
| Exponentiate(lhs, rhs)
298+
| GreaterThan(lhs, rhs)
299+
| GreaterThanOrEqual(lhs, rhs)
300+
| LessThan(lhs, rhs)
301+
| LessThanOrEqual(lhs, rhs)
302+
| Like(lhs, rhs)
303+
| Multiply(lhs, rhs)
304+
| NotEqual(lhs, rhs)
305+
| Or(lhs, rhs)
306+
| Remainder(lhs, rhs)
307+
| Subtract(lhs, rhs) => {
308+
lhs.collect(visitor, exprs);
309+
rhs.collect(visitor, exprs);
310+
}
311+
Factorial(expr) | Identity(expr) | Is(expr, _) | Negate(expr) | Not(expr) => {
312+
expr.collect(visitor, exprs);
313+
}
314+
},
264315

265-
Self::Function(_, exprs) => exprs.iter().for_each(|expr| expr.collect(visitor, c)),
316+
Self::Function(_, args) => args.iter().for_each(|arg| arg.collect(visitor, exprs)),
266317

267318
Self::All | Self::Column(_, _) | Self::Literal(_) => {}
268319
}

0 commit comments

Comments
 (0)