toydb/docs/architecture/sql-parser.md at main · erikgrinaker/toydb · GitHub

 /// A lexical token.
 ///
 /// These carry owned String clones rather than &str references into the
 /// original input string, because the lexer may need to modify the string (e.g.
 /// to parse escaped quotes in strings, lowercase identifiers, etc). We could
 /// use `Cow<str>` to avoid this in the common case, but we'll end up using
 /// owned strings in the final parsed AST anyway to avoid propagating these
 /// lifetimes throughout the entire SQL execution engine, so we keep it simple.
 #[derive(Clone, Debug, PartialEq)]
 pub enum Token {
     /// A numeric string, with digits, decimal points, and/or exponents. Leading
     /// signs (e.g. -) are separate tokens.
     Number(String),
     /// A Unicode string, with quotes stripped and escape sequences resolved.
     String(String),
     /// An identifier, with any quotes stripped. Lowercased if not quoted.
     Ident(String),
     /// A SQL keyword.
     Keyword(Keyword),
     Period,             // .
     Equal,              // =
     NotEqual,           // !=
     GreaterThan,        // >
     GreaterThanOrEqual, // >=
     LessThan,           // <
     LessThanOrEqual,    // <=
     LessOrGreaterThan,  // <>
     Plus,               // +
     Minus,              // -
     Asterisk,           // *
     Slash,              // /
     Caret,              // ^
     Percent,            // %
     Exclamation,        // !
     Question,           // ?
     Comma,              // ,
     Semicolon,          // ;
     OpenParen,          // (
     CloseParen,         // )
 }
 /// Reserved SQL keywords.
 #[derive(Clone, Copy, Debug, PartialEq)]
 pub enum Keyword {
     And,
     As,
     Asc,
     Begin,
     Bool,
     Boolean,
     By,
     Commit,
     Create,
     Cross,
     Default,
     Delete,
     Desc,
     Double,
     Drop,
     Exists,
     Explain,
     False,
     Float,
     From,
     Group,
     Having,
     If,
     Index,
     Infinity,
     Inner,
     Insert,
     Int,
     Integer,
     Into,
     Is,
     Join,
     Key,
     Left,
     Like,
     Limit,
     NaN,
     Not,
     Null,
     Of,
     Offset,
     On,
     Only,
     Or,
     Order,
     Outer,
     Primary,
     Read,
     References,
     Right,
     Rollback,
     Select,
     Set,
     String,
     System,
     Table,
     Text,
     Time,
     Transaction,
     True,
     Unique,
     Update,
     Values,
     Varchar,
     Where,
     Write,
 }
 /// The lexer (lexical analyzer) preprocesses raw SQL strings into a sequence of
 /// lexical tokens (e.g. keyword, number, string, etc), which are passed on to
 /// the SQL parser. In doing so, it strips away basic syntactic noise such as
 /// whitespace, case, and quotes, and performs initial symbol validation.
 pub struct Lexer<'a> {
     chars: Peekable<Chars<'a>>,
 }

 /// The lexer is used as a token iterator.
 impl Iterator for Lexer<'_> {
     type Item = Result<Token>;

     fn next(&mut self) -> Option<Result<Token>> {
         match self.scan() {
             Ok(Some(token)) => Some(Ok(token)),
             // If there's any remaining chars, the lexer didn't recognize them.
             Ok(None) => self.chars.peek().map(|c| errinput!("unexpected character {c}")),
             Err(err) => Some(Err(err)),
         }
     }
 }

 impl<'a> Lexer<'a> {
     /// Creates a new lexer for the given string.
     pub fn new(input: &'a str) -> Lexer<'a> {
         Lexer { chars: input.chars().peekable() }
     }
     /// Scans the next token, if any.
     fn scan(&mut self) -> Result<Option<Token>> {
         // Ignore whitespace.
         self.skip_whitespace();
         let Some(c) = self.chars.peek() else {
             return Ok(None);
         };
         // The first character tells us the token kind. Scan it accordingly.
         match c {
             '\'' => self.scan_string(),
             '"' => self.scan_ident_quoted(),
             '0'..='9' => Ok(self.scan_number()),
             c if c.is_alphabetic() => Ok(self.scan_ident_or_keyword()),
             _ => Ok(self.scan_symbol()),
         }
     }
     /// Scans the next quoted string literal, if any.
     fn scan_string(&mut self) -> Result<Option<Token>> {
         if !self.next_is('\'') {
             return Ok(None);
         }
         let mut string = String::new();
         loop {
             match self.chars.next() {
                 // '' is the escape sequence for '.
                 Some('\'') if self.next_is('\'') => string.push('\''),
                 Some('\'') => break,
                 Some(c) => string.push(c),
                 None => return errinput!("unexpected end of string literal"),
             }
         }
         Ok(Some(Token::String(string)))
     }
 /// SQL statements are represented as an Abstract Syntax Tree (AST). The
 /// statement is the root node of this tree, and describes the syntactic
 /// structure of a SQL statement. It is built from a raw SQL string by the
 /// parser, and passed on to the planner which validates it and builds an
 /// execution plan from it.
 #[derive(Debug)]
 pub enum Statement {
     /// BEGIN: begins a new transaction.
     Begin {
         /// READ ONLY: if true, begin a read-only transaction.
         read_only: bool,
         /// AS OF: if given, the MVCC version to read at.
         as_of: Option<u64>,
     },
     /// COMMIT: commits a transaction.
     Commit,
     /// ROLLBACK: rolls back a transaction.
     Rollback,
     /// EXPLAIN: explains a SQL statement's execution plan.
     Explain(Box<Statement>),
     /// CREATE TABLE: creates a new table.
     CreateTable {
         /// The table name.
         name: String,
         /// Column specifications.
         columns: Vec<Column>,
     },
     /// DROP TABLE: drops a table.
     DropTable {
         /// The table to drop.
         name: String,
         /// IF EXISTS: if true, don't error if the table doesn't exist.
         if_exists: bool,
     },
     /// DELETE: deletes rows from a table.
     Delete {
         /// The table to delete from.
         table: String,
         /// WHERE: optional condition to match rows to delete.
         r#where: Option<Expression>,
     },
     /// INSERT INTO: inserts new rows into a table.
     Insert {
         /// Table to insert into.
         table: String,
         /// Columns to insert values into. If None, all columns are used.
         columns: Option<Vec<String>>,
         /// Row values to insert.
         values: Vec<Vec<Expression>>,
     },
     /// UPDATE: updates rows in a table.
     Update {
         table: String,
         set: BTreeMap<String, Option<Expression>>, // column → value, None for default value
         r#where: Option<Expression>,
     },
     /// SELECT: selects rows, possibly from a table.
     Select {
         /// Expressions to select, with an optional column alias.
         select: Vec<(Expression, Option<String>)>,
         /// FROM: tables to select from.
         from: Vec<From>,
         /// WHERE: optional condition to filter rows.
         r#where: Option<Expression>,
         /// GROUP BY: expressions to group and aggregate by.
         group_by: Vec<Expression>,
         /// HAVING: expression to filter groups by.
         having: Option<Expression>,
         /// ORDER BY: expresisions to sort by, with direction.
         order_by: Vec<(Expression, Direction)>,
         /// OFFSET: row offset to start from.
         offset: Option<Expression>,
         /// LIMIT: maximum number of rows to return.
         limit: Option<Expression>,
     },
 }

 /// A FROM item.
 #[derive(Debug)]
 pub enum From {
     /// A table.
     Table {
         /// The table name.
         name: String,
         /// An optional alias for the table.
         alias: Option<String>,
     },
     /// A join of two or more tables (may be nested).
     Join {
         /// The left table to join,
         left: Box<From>,
         /// The right table to join.
         right: Box<From>,
         /// The join type.
         r#type: JoinType,
         /// The join condition. None for a cross join.
         predicate: Option<Expression>,
     },
 }

 /// A CREATE TABLE column definition.
 #[derive(Debug)]
 pub struct Column {
     pub name: String,
     pub datatype: DataType,
     pub primary_key: bool,
     pub nullable: Option<bool>,
     pub default: Option<Expression>,
     pub unique: bool,
     pub index: bool,
     pub references: Option<String>,
 }

 /// JOIN types.
 #[derive(Debug, PartialEq)]
 pub enum JoinType {
     Cross,
     Inner,
     Left,
     Right,
 }

 impl JoinType {
     // If true, the join is an outer join, where rows with no join matches are
     // emitted with a NULL match.
     pub fn is_outer(&self) -> bool {
         match self {
             Self::Left | Self::Right => true,
             Self::Cross | Self::Inner => false,
         }
     }
 }

 /// ORDER BY direction.
 #[derive(Debug, Default)]
 pub enum Direction {
     #[default]
     Ascending,
     Descending,
 }
 /// SQL expressions, e.g. `a + 7 > b`. Can be nested.
 #[derive(Clone, Debug, Eq, Hash, PartialEq)]
 pub enum Expression {
     /// All columns, i.e. *.
     All,
     /// A column reference, optionally qualified with a table name.
     Column(Option<String>, String),
     /// A literal value.
     Literal(Literal),
     /// A function call (name and parameters).
     Function(String, Vec<Expression>),
     /// An operator.
     Operator(Operator),
 }

 /// Expression literal values.
 #[derive(Clone, Debug)]
 pub enum Literal {
     Null,
     Boolean(bool),
     Integer(i64),
     Float(f64),
     String(String),
 }
 /// Expression operators.
 ///
 /// Since this is a recursive data structure, we have to box each child
 /// expression, which incurs a heap allocation. There are clever ways to get
 /// around this, but we keep it simple.
 #[derive(Clone, Debug, Eq, Hash, PartialEq)]
 pub enum Operator {
     And(Box<Expression>, Box<Expression>), // a AND b
     Not(Box<Expression>),                  // NOT a
     Or(Box<Expression>, Box<Expression>),  // a OR b

     Equal(Box<Expression>, Box<Expression>),       // a = b
     GreaterThan(Box<Expression>, Box<Expression>), // a > b
     GreaterThanOrEqual(Box<Expression>, Box<Expression>), // a >= b
     Is(Box<Expression>, Literal),                  // IS NULL or IS NAN
     LessThan(Box<Expression>, Box<Expression>),    // a < b
     LessThanOrEqual(Box<Expression>, Box<Expression>), // a <= b
     NotEqual(Box<Expression>, Box<Expression>),    // a != b

     Add(Box<Expression>, Box<Expression>),          // a + b
     Divide(Box<Expression>, Box<Expression>),       // a / b
     Exponentiate(Box<Expression>, Box<Expression>), // a ^ b
     Factorial(Box<Expression>),                     // a!
     Identity(Box<Expression>),                      // +a
     Multiply(Box<Expression>, Box<Expression>),     // a * b
     Negate(Box<Expression>),                        // -a
     Remainder(Box<Expression>, Box<Expression>),    // a % b
     Subtract(Box<Expression>, Box<Expression>),     // a - b

     Like(Box<Expression>, Box<Expression>), // a LIKE b
 }
 /// The SQL parser takes tokens from the lexer and parses the SQL syntax into an
 /// Abstract Syntax Tree (AST).
 ///
 /// The AST represents the syntactic structure of a SQL query (e.g. the SELECT
 /// and FROM clauses, values, arithmetic expressions, etc.). However, it only
 /// ensures the syntax is well-formed, and does not know whether e.g. a given
 /// table or column exists or which kind of join to use -- that is the job of
 /// the planner.
 pub struct Parser<'a> {
     pub lexer: Peekable<Lexer<'a>>,
 }

 impl Parser<'_> {
     /// Parses the input string into a SQL statement AST. The entire string must
     /// be parsed as a single statement, ending with an optional semicolon.
     pub fn parse(statement: &str) -> Result<ast::Statement> {
         let mut parser = Self::new(statement);
         let statement = parser.parse_statement()?;
         parser.skip(Token::Semicolon);
         if let Some(token) = parser.lexer.next().transpose()? {
             return errinput!("unexpected token {token}");
         }
         Ok(statement)
     }
     /// Parses a SQL statement.
     fn parse_statement(&mut self) -> Result<ast::Statement> {
         let Some(token) = self.peek()? else {
             return errinput!("unexpected end of input");
         };
         match token {
             Token::Keyword(Keyword::Begin) => self.parse_begin(),
             Token::Keyword(Keyword::Commit) => self.parse_commit(),
             Token::Keyword(Keyword::Rollback) => self.parse_rollback(),
             Token::Keyword(Keyword::Explain) => self.parse_explain(),

             Token::Keyword(Keyword::Create) => self.parse_create_table(),
             Token::Keyword(Keyword::Drop) => self.parse_drop_table(),

             Token::Keyword(Keyword::Delete) => self.parse_delete(),
             Token::Keyword(Keyword::Insert) => self.parse_insert(),
             Token::Keyword(Keyword::Select) => self.parse_select(),
             Token::Keyword(Keyword::Update) => self.parse_update(),

             token => errinput!("unexpected token {token}"),
         }
     }
     /// Parses a SELECT statement.
     fn parse_select(&mut self) -> Result<ast::Statement> {
         Ok(ast::Statement::Select {
             select: self.parse_select_clause()?,
             from: self.parse_from_clause()?,
             r#where: self.parse_where_clause()?,
             group_by: self.parse_group_by_clause()?,
             having: self.parse_having_clause()?,
             order_by: self.parse_order_by_clause()?,
             limit: self.parse_limit_clause()?,
             offset: self.parse_offset_clause()?,
         })
     }
     /// Parses a SELECT clause, if present.
     fn parse_select_clause(&mut self) -> Result<Vec<(ast::Expression, Option<String>)>> {
         if !self.next_is(Keyword::Select.into()) {
             return Ok(Vec::new());
         }
         let mut select = Vec::new();
         loop {
             let expr = self.parse_expression()?;
             let mut alias = None;
             if self.next_is(Keyword::As.into()) || matches!(self.peek()?, Some(Token::Ident(_))) {
                 if expr == ast::Expression::All {
                     return errinput!("can't alias *");
                 }
                 alias = Some(self.next_ident()?);
             }
             select.push((expr, alias));
             if !self.next_is(Token::Comma) {
                 break;
             }
         }
         Ok(select)
     }
     /// Parses a FROM clause, if present.
     fn parse_from_clause(&mut self) -> Result<Vec<ast::From>> {
         if !self.next_is(Keyword::From.into()) {
             return Ok(Vec::new());
         }
         let mut from = Vec::new();
         loop {
             let mut from_item = self.parse_from_table()?;
             while let Some(r#type) = self.parse_from_join()? {
                 let left = Box::new(from_item);
                 let right = Box::new(self.parse_from_table()?);
                 let mut predicate = None;
                 if r#type != ast::JoinType::Cross {
                     self.expect(Keyword::On.into())?;
                     predicate = Some(self.parse_expression()?)
                 }
                 from_item = ast::From::Join { left, right, r#type, predicate };
             }
             from.push(from_item);
             if !self.next_is(Token::Comma) {
                 break;
             }
         }
         Ok(from)
     }

     // Parses a FROM table.
     fn parse_from_table(&mut self) -> Result<ast::From> {
         let name = self.next_ident()?;
         let mut alias = None;
         if self.next_is(Keyword::As.into()) || matches!(self.peek()?, Some(Token::Ident(_))) {
             alias = Some(self.next_ident()?)
         };
         Ok(ast::From::Table { name, alias })
     }

     // Parses a FROM JOIN type, if present.
     fn parse_from_join(&mut self) -> Result<Option<ast::JoinType>> {
         if self.next_is(Keyword::Join.into()) {
             return Ok(Some(ast::JoinType::Inner));
         }
         if self.next_is(Keyword::Cross.into()) {
             self.expect(Keyword::Join.into())?;
             return Ok(Some(ast::JoinType::Cross));
         }
         if self.next_is(Keyword::Inner.into()) {
             self.expect(Keyword::Join.into())?;
             return Ok(Some(ast::JoinType::Inner));
         }
         if self.next_is(Keyword::Left.into()) {
             self.skip(Keyword::Outer.into());
             self.expect(Keyword::Join.into())?;
             return Ok(Some(ast::JoinType::Left));
         }
         if self.next_is(Keyword::Right.into()) {
             self.skip(Keyword::Outer.into());
             self.expect(Keyword::Join.into())?;
             return Ok(Some(ast::JoinType::Right));
         }
         Ok(None)
     }
     /// Parses a WHERE clause, if present.
     fn parse_where_clause(&mut self) -> Result<Option<ast::Expression>> {
         if !self.next_is(Keyword::Where.into()) {
             return Ok(None);
         }
         Ok(Some(self.parse_expression()?))
     }
     /// Parses an expression using the precedence climbing algorithm. See:
     ///
     /// <https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method>
     /// <https://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing>
     ///
     /// Expressions are made up of two main entities:
     ///
     /// * Atoms: values, variables, functions, and parenthesized expressions.
     /// * Operators: performs operations on atoms and sub-expressions.
     ///   * Prefix operators: e.g. `-a` or `NOT a`.
     ///   * Infix operators: e.g. `a + b`  or `a AND b`.
     ///   * Postfix operators: e.g. `a!` or `a IS NULL`.
     ///
     /// During parsing, we have to respect the mathematical precedence and
     /// associativity of operators. Consider e.g.:
     ///
     /// 2 ^ 3 ^ 2 - 4 * 3
     ///
     /// By the rules of precedence and associativity, this expression should
     /// be interpreted as:
     ///
     /// (2 ^ (3 ^ 2)) - (4 * 3)
     ///
     /// Specifically, the exponentiation operator ^ is right-associative, so it
     /// should be 2 ^ (3 ^ 2) = 512, not (2 ^ 3) ^ 2 = 64. Similarly,
     /// exponentiation and multiplication have higher precedence than
     /// subtraction, so it should be (2 ^ 3 ^ 2) - (4 * 3) = 500, not
     /// 2 ^ 3 ^ (2 - 4) * 3 = -3.24.
     ///
     /// To use precedence climbing, we first need to specify the relative
     /// precedence of operators as a number, where 1 is the lowest precedence:
     ///
     /// * 1: OR
     /// * 2: AND
     /// * 3: NOT
     /// * 4: =, !=, LIKE, IS
     /// * 5: <, <=, >, >=
     /// * 6: +, -
     /// * 7: *, /, %
     /// * 8: ^
     /// * 9: !
     /// * 10: +, - (prefix)
     ///
     /// We also have to specify the associativity of operators:
     ///
     /// * Right-associative: ^ and all prefix operators.
     /// * Left-associative: all other operators.
     ///
     /// Left-associative operators get a +1 to their precedence, so that they
     /// bind tighter to their left operand than right-associative operators.
     ///
     /// The precedence climbing algorithm works by recursively parsing the
     /// left-hand side of an expression (including any prefix operators), any
     /// infix operators and recursive right-hand side expressions, and finally
     /// any postfix operators.
     ///
     /// The grouping is determined by where the right-hand side recursion
     /// terminates. The algorithm will greedily consume as many operators as
     /// possible, but only as long as their precedence is greater than or equal
     /// to the precedence of the previous operator (hence the name "climbing").
     /// When we find an operator with lower precedence, we return the current
     /// expression up the recursion stack and resume parsing the operator at a
     /// lower precedence.
     ///
     /// The precedence levels for the previous example are as follows:
     ///
     ///     -----          Precedence 9: ^ right-associativity
     /// ---------          Precedence 9: ^
     ///             -----  Precedence 7: *
     /// -----------------  Precedence 6: -
     /// 2 ^ 3 ^ 2 - 4 * 3
     ///
     /// Let's walk through the recursive parsing of this expression:
     ///
     /// parse_expression_at(prec=0)
     ///   lhs = parse_expression_atom() = 2
     ///   op = parse_infix_operator(prec=0) = ^ (prec=9)
     ///   rhs = parse_expression_at(prec=9)
     ///     lhs = parse_expression_atom() = 3
     ///     op = parse_infix_operator(prec=9) = ^ (prec=9)
     ///     rhs = parse_expression_at(prec=9)
     ///       lhs = parse_expression_atom() = 2
     ///       op = parse_infix_operator(prec=9) = None (reject - at prec=6)
     ///       return lhs = 2
     ///     lhs = (lhs op rhs) = (3 ^ 2)
     ///     op = parse_infix_operator(prec=9) = None (reject - at prec=6)
     ///     return lhs = (3 ^ 2)
     ///   lhs = (lhs op rhs) = (2 ^ (3 ^ 2))
     ///   op = parse_infix_operator(prec=0) = - (prec=6)
     ///   rhs = parse_expression_at(prec=6)
     ///     lhs = parse_expression_atom() = 4
     ///     op = parse_infix_operator(prec=6) = * (prec=7)
     ///     rhs = parse_expression_at(prec=7)
     ///       lhs = parse_expression_atom() = 3
     ///       op = parse_infix_operator(prec=7) = None (end of expression)
     ///       return lhs = 3
     ///     lhs = (lhs op rhs) = (4 * 3)
     ///     op = parse_infix_operator(prec=6) = None (end of expression)
     ///     return lhs = (4 * 3)
     ///   lhs = (lhs op rhs) = ((2 ^ (3 ^ 2)) - (4 * 3))
     ///   op = parse_infix_operator(prec=0) = None (end of expression)
     ///   return lhs = ((2 ^ (3 ^ 2)) - (4 * 3))
     fn parse_expression(&mut self) -> Result<ast::Expression> {
         self.parse_expression_at(0)
     }

     /// Parses an expression at the given minimum precedence.
     fn parse_expression_at(&mut self, min_precedence: Precedence) -> Result<ast::Expression> {
         // If the left-hand side is a prefix operator, recursively parse it and
         // its operand. Otherwise, parse the left-hand side as an atom.
         let mut lhs = if let Some(prefix) = self.parse_prefix_operator_at(min_precedence) {
             let next_precedence = prefix.precedence() + prefix.associativity();
             let rhs = self.parse_expression_at(next_precedence)?;
             prefix.into_expression(rhs)
         } else {
             self.parse_expression_atom()?
         };

         // Apply any postfix operators to the left-hand side.
         while let Some(postfix) = self.parse_postfix_operator_at(min_precedence)? {
             lhs = postfix.into_expression(lhs)
         }

         // Repeatedly apply any infix operators to the left-hand side as long as
         // their precedence is greater than or equal to the current minimum
         // precedence (i.e. that of the upstack operator).
         //
         // The right-hand side expression parsing will recursively apply any
         // infix operators at or above this operator's precedence to the
         // right-hand side.
         while let Some(infix) = self.parse_infix_operator_at(min_precedence) {
             let next_precedence = infix.precedence() + infix.associativity();
             let rhs = self.parse_expression_at(next_precedence)?;
             lhs = infix.into_expression(lhs, rhs);
         }

         // Apply any postfix operators after the binary operator. Consider e.g.
         // 1 + NULL IS NULL.
         while let Some(postfix) = self.parse_postfix_operator_at(min_precedence)? {
             lhs = postfix.into_expression(lhs)
         }

         Ok(lhs)
     }

     /// Parses an expression atom. This is either:
     ///
     /// * A literal value.
     /// * A column name.
     /// * A function call.
     /// * A parenthesized expression.
     fn parse_expression_atom(&mut self) -> Result<ast::Expression> {
         Ok(match self.next()? {
             // All columns.
             Token::Asterisk => ast::Expression::All,

             // Literal value.
             Token::Number(n) if n.chars().all(|c| c.is_ascii_digit()) => {
                 ast::Literal::Integer(n.parse()?).into()
             }
             Token::Number(n) => ast::Literal::Float(n.parse()?).into(),
             Token::String(s) => ast::Literal::String(s).into(),
             Token::Keyword(Keyword::True) => ast::Literal::Boolean(true).into(),
             Token::Keyword(Keyword::False) => ast::Literal::Boolean(false).into(),
             Token::Keyword(Keyword::Infinity) => ast::Literal::Float(f64::INFINITY).into(),
             Token::Keyword(Keyword::NaN) => ast::Literal::Float(f64::NAN).into(),
             Token::Keyword(Keyword::Null) => ast::Literal::Null.into(),

             // Function call.
             Token::Ident(name) if self.next_is(Token::OpenParen) => {
                 let mut args = Vec::new();
                 while !self.next_is(Token::CloseParen) {
                     if !args.is_empty() {
                         self.expect(Token::Comma)?;
                     }
                     args.push(self.parse_expression()?);
                 }
                 ast::Expression::Function(name, args)
             }

             // Column name, either qualified as table.column or unqualified.
             Token::Ident(table) if self.next_is(Token::Period) => {
                 ast::Expression::Column(Some(table), self.next_ident()?)
             }
             Token::Ident(column) => ast::Expression::Column(None, column),

             // Parenthesized expression.
             Token::OpenParen => {
                 let expr = self.parse_expression()?;
                 self.expect(Token::CloseParen)?;
                 expr
             }

             token => return errinput!("expected expression atom, found {token}"),
         })
     }
	/// A lexical token.
	///
	/// These carry owned String clones rather than &str references into the
	/// original input string, because the lexer may need to modify the string (e.g.
	/// to parse escaped quotes in strings, lowercase identifiers, etc). We could
	/// use `Cow<str>` to avoid this in the common case, but we'll end up using
	/// owned strings in the final parsed AST anyway to avoid propagating these
	/// lifetimes throughout the entire SQL execution engine, so we keep it simple.
	#[derive(Clone, Debug, PartialEq)]
	pub enum Token {
	/// A numeric string, with digits, decimal points, and/or exponents. Leading
	/// signs (e.g. -) are separate tokens.
	Number(String),
	/// A Unicode string, with quotes stripped and escape sequences resolved.
	String(String),
	/// An identifier, with any quotes stripped. Lowercased if not quoted.
	Ident(String),
	/// A SQL keyword.
	Keyword(Keyword),
	Period, // .
	Equal, // =
	NotEqual, // !=
	GreaterThan, // >
	GreaterThanOrEqual, // >=
	LessThan, // <
	LessThanOrEqual, // <=
	LessOrGreaterThan, // <>
	Plus, // +
	Minus, // -
	Asterisk, // *
	Slash, // /
	Caret, // ^
	Percent, // %
	Exclamation, // !
	Question, // ?
	Comma, // ,
	Semicolon, // ;
	OpenParen, // (
	CloseParen, // )
	}
	/// Reserved SQL keywords.
	#[derive(Clone, Copy, Debug, PartialEq)]
	pub enum Keyword {
	And,
	As,
	Asc,
	Begin,
	Bool,
	Boolean,
	By,
	Commit,
	Create,
	Cross,
	Default,
	Delete,
	Desc,
	Double,
	Drop,
	Exists,
	Explain,
	False,
	Float,
	From,
	Group,
	Having,
	If,
	Index,
	Infinity,
	Inner,
	Insert,
	Int,
	Integer,
	Into,
	Is,
	Join,
	Key,
	Left,
	Like,
	Limit,
	NaN,
	Not,
	Null,
	Of,
	Offset,
	On,
	Only,
	Or,
	Order,
	Outer,
	Primary,
	Read,
	References,
	Right,
	Rollback,
	Select,
	Set,
	String,
	System,
	Table,
	Text,
	Time,
	Transaction,
	True,
	Unique,
	Update,
	Values,
	Varchar,
	Where,
	Write,
	}
	/// The lexer (lexical analyzer) preprocesses raw SQL strings into a sequence of
	/// lexical tokens (e.g. keyword, number, string, etc), which are passed on to
	/// the SQL parser. In doing so, it strips away basic syntactic noise such as
	/// whitespace, case, and quotes, and performs initial symbol validation.
	pub struct Lexer<'a> {
	chars: Peekable<Chars<'a>>,
	}

	/// The lexer is used as a token iterator.
	impl Iterator for Lexer<'_> {
	type Item = Result<Token>;

	fn next(&mut self) -> Option<Result<Token>> {
	match self.scan() {
	Ok(Some(token)) => Some(Ok(token)),
	// If there's any remaining chars, the lexer didn't recognize them.
	Ok(None) => self.chars.peek().map(\|c\| errinput!("unexpected character {c}")),
	Err(err) => Some(Err(err)),
	}
	}
	}

	impl<'a> Lexer<'a> {
	/// Creates a new lexer for the given string.
	pub fn new(input: &'a str) -> Lexer<'a> {
	Lexer { chars: input.chars().peekable() }
	}
	/// Scans the next token, if any.
	fn scan(&mut self) -> Result<Option<Token>> {
	// Ignore whitespace.
	self.skip_whitespace();
	let Some(c) = self.chars.peek() else {
	return Ok(None);
	};
	// The first character tells us the token kind. Scan it accordingly.
	match c {
	'\'' => self.scan_string(),
	'"' => self.scan_ident_quoted(),
	'0'..='9' => Ok(self.scan_number()),
	c if c.is_alphabetic() => Ok(self.scan_ident_or_keyword()),
	_ => Ok(self.scan_symbol()),
	}
	}
	/// Scans the next quoted string literal, if any.
	fn scan_string(&mut self) -> Result<Option<Token>> {
	if !self.next_is('\'') {
	return Ok(None);
	}
	let mut string = String::new();
	loop {
	match self.chars.next() {
	// '' is the escape sequence for '.
	Some('\'') if self.next_is('\'') => string.push('\''),
	Some('\'') => break,
	Some(c) => string.push(c),
	None => return errinput!("unexpected end of string literal"),
	}
	}
	Ok(Some(Token::String(string)))
	}
	/// SQL statements are represented as an Abstract Syntax Tree (AST). The
	/// statement is the root node of this tree, and describes the syntactic
	/// structure of a SQL statement. It is built from a raw SQL string by the
	/// parser, and passed on to the planner which validates it and builds an
	/// execution plan from it.
	#[derive(Debug)]
	pub enum Statement {
	/// BEGIN: begins a new transaction.
	Begin {
	/// READ ONLY: if true, begin a read-only transaction.
	read_only: bool,
	/// AS OF: if given, the MVCC version to read at.
	as_of: Option<u64>,
	},
	/// COMMIT: commits a transaction.
	Commit,
	/// ROLLBACK: rolls back a transaction.
	Rollback,
	/// EXPLAIN: explains a SQL statement's execution plan.
	Explain(Box<Statement>),
	/// CREATE TABLE: creates a new table.
	CreateTable {
	/// The table name.
	name: String,
	/// Column specifications.
	columns: Vec<Column>,
	},
	/// DROP TABLE: drops a table.
	DropTable {
	/// The table to drop.
	name: String,
	/// IF EXISTS: if true, don't error if the table doesn't exist.
	if_exists: bool,
	},
	/// DELETE: deletes rows from a table.
	Delete {
	/// The table to delete from.
	table: String,
	/// WHERE: optional condition to match rows to delete.
	r#where: Option<Expression>,
	},
	/// INSERT INTO: inserts new rows into a table.
	Insert {
	/// Table to insert into.
	table: String,
	/// Columns to insert values into. If None, all columns are used.
	columns: Option<Vec<String>>,
	/// Row values to insert.
	values: Vec<Vec<Expression>>,
	},
	/// UPDATE: updates rows in a table.
	Update {
	table: String,
	set: BTreeMap<String, Option<Expression>>, // column → value, None for default value
	r#where: Option<Expression>,
	},
	/// SELECT: selects rows, possibly from a table.
	Select {
	/// Expressions to select, with an optional column alias.
	select: Vec<(Expression, Option<String>)>,
	/// FROM: tables to select from.
	from: Vec<From>,
	/// WHERE: optional condition to filter rows.
	r#where: Option<Expression>,
	/// GROUP BY: expressions to group and aggregate by.
	group_by: Vec<Expression>,
	/// HAVING: expression to filter groups by.
	having: Option<Expression>,
	/// ORDER BY: expresisions to sort by, with direction.
	order_by: Vec<(Expression, Direction)>,
	/// OFFSET: row offset to start from.
	offset: Option<Expression>,
	/// LIMIT: maximum number of rows to return.
	limit: Option<Expression>,
	},
	}

	/// A FROM item.
	#[derive(Debug)]
	pub enum From {
	/// A table.
	Table {
	/// The table name.
	name: String,
	/// An optional alias for the table.
	alias: Option<String>,
	},
	/// A join of two or more tables (may be nested).
	Join {
	/// The left table to join,
	left: Box<From>,
	/// The right table to join.
	right: Box<From>,
	/// The join type.
	r#type: JoinType,
	/// The join condition. None for a cross join.
	predicate: Option<Expression>,
	},
	}

	/// A CREATE TABLE column definition.
	#[derive(Debug)]
	pub struct Column {
	pub name: String,
	pub datatype: DataType,
	pub primary_key: bool,
	pub nullable: Option<bool>,
	pub default: Option<Expression>,
	pub unique: bool,
	pub index: bool,
	pub references: Option<String>,
	}

	/// JOIN types.
	#[derive(Debug, PartialEq)]
	pub enum JoinType {
	Cross,
	Inner,
	Left,
	Right,
	}

	impl JoinType {
	// If true, the join is an outer join, where rows with no join matches are
	// emitted with a NULL match.
	pub fn is_outer(&self) -> bool {
	match self {
	Self::Left \| Self::Right => true,
	Self::Cross \| Self::Inner => false,
	}
	}
	}

	/// ORDER BY direction.
	#[derive(Debug, Default)]
	pub enum Direction {
	#[default]
	Ascending,
	Descending,
	}
	/// SQL expressions, e.g. `a + 7 > b`. Can be nested.
	#[derive(Clone, Debug, Eq, Hash, PartialEq)]
	pub enum Expression {
	/// All columns, i.e. *.
	All,
	/// A column reference, optionally qualified with a table name.
	Column(Option<String>, String),
	/// A literal value.
	Literal(Literal),
	/// A function call (name and parameters).
	Function(String, Vec<Expression>),
	/// An operator.
	Operator(Operator),
	}

	/// Expression literal values.
	#[derive(Clone, Debug)]
	pub enum Literal {
	Null,
	Boolean(bool),
	Integer(i64),
	Float(f64),
	String(String),
	}
	/// Expression operators.
	///
	/// Since this is a recursive data structure, we have to box each child
	/// expression, which incurs a heap allocation. There are clever ways to get
	/// around this, but we keep it simple.
	#[derive(Clone, Debug, Eq, Hash, PartialEq)]
	pub enum Operator {
	And(Box<Expression>, Box<Expression>), // a AND b
	Not(Box<Expression>), // NOT a
	Or(Box<Expression>, Box<Expression>), // a OR b

	Equal(Box<Expression>, Box<Expression>), // a = b
	GreaterThan(Box<Expression>, Box<Expression>), // a > b
	GreaterThanOrEqual(Box<Expression>, Box<Expression>), // a >= b
	Is(Box<Expression>, Literal), // IS NULL or IS NAN
	LessThan(Box<Expression>, Box<Expression>), // a < b
	LessThanOrEqual(Box<Expression>, Box<Expression>), // a <= b
	NotEqual(Box<Expression>, Box<Expression>), // a != b

	Add(Box<Expression>, Box<Expression>), // a + b
	Divide(Box<Expression>, Box<Expression>), // a / b
	Exponentiate(Box<Expression>, Box<Expression>), // a ^ b
	Factorial(Box<Expression>), // a!
	Identity(Box<Expression>), // +a
	Multiply(Box<Expression>, Box<Expression>), // a * b
	Negate(Box<Expression>), // -a
	Remainder(Box<Expression>, Box<Expression>), // a % b
	Subtract(Box<Expression>, Box<Expression>), // a - b

	Like(Box<Expression>, Box<Expression>), // a LIKE b
	}
	/// The SQL parser takes tokens from the lexer and parses the SQL syntax into an
	/// Abstract Syntax Tree (AST).
	///
	/// The AST represents the syntactic structure of a SQL query (e.g. the SELECT
	/// and FROM clauses, values, arithmetic expressions, etc.). However, it only
	/// ensures the syntax is well-formed, and does not know whether e.g. a given
	/// table or column exists or which kind of join to use -- that is the job of
	/// the planner.
	pub struct Parser<'a> {
	pub lexer: Peekable<Lexer<'a>>,
	}

	impl Parser<'_> {
	/// Parses the input string into a SQL statement AST. The entire string must
	/// be parsed as a single statement, ending with an optional semicolon.
	pub fn parse(statement: &str) -> Result<ast::Statement> {
	let mut parser = Self::new(statement);
	let statement = parser.parse_statement()?;
	parser.skip(Token::Semicolon);
	if let Some(token) = parser.lexer.next().transpose()? {
	return errinput!("unexpected token {token}");
	}
	Ok(statement)
	}
	/// Parses a SQL statement.
	fn parse_statement(&mut self) -> Result<ast::Statement> {
	let Some(token) = self.peek()? else {
	return errinput!("unexpected end of input");
	};
	match token {
	Token::Keyword(Keyword::Begin) => self.parse_begin(),
	Token::Keyword(Keyword::Commit) => self.parse_commit(),
	Token::Keyword(Keyword::Rollback) => self.parse_rollback(),
	Token::Keyword(Keyword::Explain) => self.parse_explain(),

	Token::Keyword(Keyword::Create) => self.parse_create_table(),
	Token::Keyword(Keyword::Drop) => self.parse_drop_table(),

	Token::Keyword(Keyword::Delete) => self.parse_delete(),
	Token::Keyword(Keyword::Insert) => self.parse_insert(),
	Token::Keyword(Keyword::Select) => self.parse_select(),
	Token::Keyword(Keyword::Update) => self.parse_update(),

	token => errinput!("unexpected token {token}"),
	}
	}