Skip to content

Latest commit

 

History

History
130 lines (88 loc) · 6.08 KB

File metadata and controls

130 lines (88 loc) · 6.08 KB

SQL Data Model

The SQL data model represents user data in tables and rows. It is made up of data types and schemas, in the sql::types module.

Data Types

toyDB supports four basic scalar data types as sql::types::DataType: booleans, integers, floats, and strings.

/// A primitive SQL data type. For simplicity, only a handful of scalar types
/// are supported (no compound types).
#[derive(Clone, Copy, Debug, Hash, PartialEq, Serialize, Deserialize)]
pub enum DataType {
/// A boolean: true or false.
Boolean,
/// A 64-bit signed integer.
Integer,
/// A 64-bit floating point number.
Float,
/// A UTF-8 encoded string.
String,
}

Specific values are represented as sql::types::Value, using the corresponding Rust types. toyDB also supports SQL NULL values, i.e. unknown values, following the rules of three-valued logic.

/// A primitive SQL value, represented as a native Rust type.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum Value {
/// An unknown value of unknown type (i.e. SQL NULL).
///
/// In code, Null is considered equal to Null, so that we can detect, index,
/// and order these values. The SQL NULL semantics are implemented during
/// Expression evaluation.
Null,
/// A boolean.
Boolean(bool),
/// A 64-bit signed integer.
Integer(i64),
/// A 64-bit floating point number.
///
/// In code, NaN is considered equal to NaN, so that we can detect, index,
/// and order these values. The SQL NAN semantics are implemented during
/// Expression evaluation.
///
/// -0.0 and -NaN are considered equal to their positive counterpart, and
/// normalized as positive when serialized (for key lookups).
Float(#[serde(serialize_with = "serialize_f64")] f64),
/// A UTF-8 encoded string.
String(String),
}

The Value type provides basic formatting, conversion, and mathematical operations.

impl Display for Value {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
match self {
Self::Null => f.write_str("NULL"),
Self::Boolean(true) => f.write_str("TRUE"),
Self::Boolean(false) => f.write_str("FALSE"),
Self::Integer(integer) => integer.fmt(f),
Self::Float(float) => write!(f, "{float:?}"),
Self::String(string) => write!(f, "'{}'", string.escape_debug()),
}
}
}

impl Value {
/// Returns the value's datatype, or None for null values.
pub fn datatype(&self) -> Option<DataType> {
match self {
Self::Null => None,
Self::Boolean(_) => Some(DataType::Boolean),
Self::Integer(_) => Some(DataType::Integer),
Self::Float(_) => Some(DataType::Float),
Self::String(_) => Some(DataType::String),
}
}
/// Returns true if the value is undefined (NULL or NaN).
pub fn is_undefined(&self) -> bool {
match self {
Self::Null => true,
Self::Float(f) if f.is_nan() => true,
_ => false,
}
}
/// Adds two values. Errors if invalid.
pub fn checked_add(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) => match lhs.checked_add(*rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
},
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 + rhs),
(Float(lhs), Integer(rhs)) => Float(lhs + *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs + rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't add {lhs} and {rhs}"),
})
}
/// Divides two values. Errors if invalid.
pub fn checked_div(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(_), Integer(0)) => return errinput!("can't divide by zero"),
(Integer(lhs), Integer(rhs)) => Integer(lhs / rhs),
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 / rhs),
(Float(lhs), Integer(rhs)) => Float(lhs / *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs / rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't divide {lhs} and {rhs}"),
})
}
/// Multiplies two values. Errors if invalid.
pub fn checked_mul(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) => match lhs.checked_mul(*rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
},
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 * rhs),
(Float(lhs), Integer(rhs)) => Float(lhs * *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs * rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't multiply {lhs} and {rhs}"),
})
}
/// Exponentiates two values. Errors if invalid.
pub fn checked_pow(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) if *rhs >= 0 => {
let rhs = (*rhs).try_into().or_else(|_| errinput!("integer overflow"))?;
match lhs.checked_pow(rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
}
}
(Integer(lhs), Integer(rhs)) => Float((*lhs as f64).powf(*rhs as f64)),
(Integer(lhs), Float(rhs)) => Float((*lhs as f64).powf(*rhs)),
(Float(lhs), Integer(rhs)) => Float((lhs).powi(*rhs as i32)),
(Float(lhs), Float(rhs)) => Float((lhs).powf(*rhs)),
(Integer(_) | Float(_), Null) => Null,
(Null, Integer(_) | Float(_) | Null) => Null,
(lhs, rhs) => return errinput!("can't exponentiate {lhs} and {rhs}"),
})
}
/// Finds the remainder of two values. Errors if invalid.
///
/// NB: uses the remainder, not modulo, like Postgres. This means that for
/// negative values, the result has the sign of the dividend, rather than
/// always returning a positive value.
pub fn checked_rem(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(_), Integer(0)) => return errinput!("can't divide by zero"),
(Integer(lhs), Integer(rhs)) => Integer(lhs % rhs),
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 % rhs),
(Float(lhs), Integer(rhs)) => Float(lhs % *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs % rhs),
(Integer(_) | Float(_) | Null, Null) => Null,
(Null, Integer(_) | Float(_)) => Null,
(lhs, rhs) => return errinput!("can't take remainder of {lhs} and {rhs}"),
})
}
/// Subtracts two values. Errors if invalid.
pub fn checked_sub(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) => match lhs.checked_sub(*rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
},
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 - rhs),
(Float(lhs), Integer(rhs)) => Float(lhs - *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs - rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't subtract {lhs} and {rhs}"),
})
}
}
impl From<bool> for Value {
fn from(v: bool) -> Self {
Value::Boolean(v)
}
}
impl From<f64> for Value {
fn from(v: f64) -> Self {
Value::Float(v)
}
}
impl From<i64> for Value {
fn from(v: i64) -> Self {
Value::Integer(v)
}
}
impl From<String> for Value {
fn from(v: String) -> Self {
Value::String(v)
}
}
impl From<&str> for Value {
fn from(v: &str) -> Self {
Value::String(v.to_owned())
}
}
impl TryFrom<Value> for bool {
type Error = Error;
fn try_from(value: Value) -> Result<Self> {
let Value::Boolean(b) = value else {
return errdata!("not a boolean: {value}");
};
Ok(b)
}
}
impl TryFrom<Value> for f64 {
type Error = Error;
fn try_from(value: Value) -> Result<Self> {
let Value::Float(f) = value else {
return errdata!("not a float: {value}");
};
Ok(f)
}
}
impl TryFrom<Value> for i64 {
type Error = Error;
fn try_from(value: Value) -> Result<Self> {
let Value::Integer(i) = value else {
return errdata!("not an integer: {value}");
};
Ok(i)
}
}
impl TryFrom<Value> for String {
type Error = Error;
fn try_from(value: Value) -> Result<Self> {
let Value::String(s) = value else {
return errdata!("not a string: {value}");
};
Ok(s)
}
}

It also specifies comparison and ordering semantics, but these are subtly different from the SQL semantics. For example, in Rust code Value::Null == Value::Null yields true, while in SQL NULL = NULL yields NULL. This mismatch is necessary for the Rust code to properly detect and process Null values, and the desired SQL semantics are implemented during expression evaluation which we'll cover below.

// Consider Nulls and ±NaNs equal. Rust already considers -0.0 == 0.0.
impl PartialEq for Value {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Boolean(a), Self::Boolean(b)) => a == b,
(Self::Integer(a), Self::Integer(b)) => a == b,
(Self::Integer(a), Self::Float(b)) => *a as f64 == *b,
(Self::Float(a), Self::Integer(b)) => *a == *b as f64,
(Self::Float(a), Self::Float(b)) => a == b || a.is_nan() && b.is_nan(),
(Self::String(a), Self::String(b)) => a == b,
(Self::Null, Self::Null) => true,
(_, _) => false,
}
}
}
impl Eq for Value {}
// Allow hashing Nulls and floats, and hash -0.0 and -NaN as positive.
impl Hash for Value {
fn hash<H: Hasher>(&self, hasher: &mut H) {
core::mem::discriminant(self).hash(hasher); // hash the type
match self {
Self::Null => {}
Self::Boolean(v) => v.hash(hasher),
Self::Integer(v) => v.hash(hasher),
Self::Float(v) => {
// Hash -NaN and -0.0 as positive.
let mut v = *v;
if (v.is_nan() || v == 0.0) && v.is_sign_negative() {
v = -v;
}
v.to_bits().hash(hasher)
}
Self::String(v) => v.hash(hasher),
}
}
}
// Consider Nulls and NaNs equal when ordering.
//
// We establish a total order across all types, even though mixed types will
// rarely/never come up: String > Integer/Float > Boolean > Null.
impl Ord for Value {
fn cmp(&self, other: &Self) -> Ordering {
match (self, other) {
(Self::Null, Self::Null) => Ordering::Equal,
(Self::Boolean(a), Self::Boolean(b)) => a.cmp(b),
(Self::Integer(a), Self::Integer(b)) => a.cmp(b),
(Self::Integer(a), Self::Float(b)) => (*a as f64).total_cmp(b),
(Self::Float(a), Self::Integer(b)) => a.total_cmp(&(*b as f64)),
(Self::Float(a), Self::Float(b)) => a.total_cmp(b),
(Self::String(a), Self::String(b)) => a.cmp(b),
(Self::Null, _) => Ordering::Less,
(_, Self::Null) => Ordering::Greater,
(Self::Boolean(_), _) => Ordering::Less,
(_, Self::Boolean(_)) => Ordering::Greater,
(Self::Float(_), _) => Ordering::Less,
(_, Self::Float(_)) => Ordering::Greater,
(Self::Integer(_), _) => Ordering::Less,
(_, Self::Integer(_)) => Ordering::Greater,
// String is ordered last.
}
}
}
impl PartialOrd for Value {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

During execution, a row of values is represented as sql::types::Row, with multiple rows emitted via sql::types::Rows row iterators:

/// A row of values.
pub type Row = Vec<Value>;
/// A row iterator.
pub type Rows = Box<dyn RowIterator>;
/// A row iterator trait, which requires the iterator to be both clonable and
/// object-safe. Cloning allows resetting an iterator back to an initial state,
/// e.g. for nested loop joins. It uses a blanket implementation, and relies on
/// dyn_clone to allow cloning trait objects.
pub trait RowIterator: Iterator<Item = Result<Row>> + DynClone {}

Schemas

toyDB schemas only support tables. There are no named indexes or constraints, and there's only a single unnamed database.

Tables are represented by sql::types::Table:

/// A table schema, which specifies its data structure and constraints.
///
/// Tables can't change after they are created. There is no ALTER TABLE nor
/// CREATE/DROP INDEX, only CREATE TABLE and DROP TABLE.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct Table {
/// The table name. Unique identifier for the table. Can't be empty.
pub name: String,
/// The primary key column index. A table must have a primary key, and it
/// can only be a single column.
pub primary_key: usize,
/// The table's columns. Must have at least one.
pub columns: Vec<Column>,
}

A table is made up of a set of columns, represented by sql::types::Column. These support the data types described above, along with unique constraints, foreign keys, and secondary indexes.

/// A table column.
#[derive(Clone, Debug, PartialEq, Deserialize, Serialize)]
pub struct Column {
/// Column name. Can't be empty.
pub name: String,
/// Column datatype.
pub datatype: DataType,
/// Whether the column allows null values. Not legal for primary keys.
pub nullable: bool,
/// The column's default value. If None, the user must specify an explicit
/// value. Must match the column datatype. Nullable columns require a
/// default (often Null). Null is only a valid default when nullable.
pub default: Option<Value>,
/// Whether the column should only allow unique values (ignoring NULLs).
/// Must be true for a primary key column. Requires index.
pub unique: bool,
/// Whether the column should have a secondary index. Must be false for
/// primary keys, which are the primary index. Must be true for unique or
/// reference columns.
pub index: bool,
/// If set, this column is a foreign key reference to the given table's
/// primary key. Must be of the same type as the target primary key.
/// Requires index.
pub references: Option<String>,
}

The table name serves as a unique identifier, and can't be changed later. In fact, tables schemas are entirely static: they can only be created or dropped (there are no schema changes).

Table schemas are stored in the catalog, represented by the sql::engine::Catalog trait. We'll revisit the implementation of this trait in the SQL storage section.

/// The catalog stores table schema information. It must be implemented for
/// Transaction, and is thus fully transactional. For simplicity, it only
/// supports creating and dropping tables -- there are no ALTER TABLE schema
/// changes, nor CREATE INDEX.
pub trait Catalog {
/// Creates a new table. Errors if it already exists.
fn create_table(&self, table: Table) -> Result<()>;
/// Drops a table. Errors if it does not exist, unless if_exists is true.
/// Returns true if the table existed and was deleted.
fn drop_table(&self, table: &str, if_exists: bool) -> Result<bool>;
/// Fetches a table schema, or None if it doesn't exist.
fn get_table(&self, table: &str) -> Result<Option<Table>>;
/// Returns a list of all table schemas.
fn list_tables(&self) -> Result<Vec<Table>>;
/// Fetches a table schema, or errors if it does not exist.
fn must_get_table(&self, table: &str) -> Result<Table> {
self.get_table(table)?.ok_or_else(|| errinput!("table {table} does not exist"))
}
}

Table schemas are validated when created via Table::validate(), which enforces invariants and internal consistency. It uses the catalog to look up information about other tables, e.g. that foreign key references point to a valid target column in a different table.

/// Validates the table schema, using the catalog to validate foreign key
/// references.
pub fn validate(&self, catalog: &impl Catalog) -> Result<()> {
if self.name.is_empty() {
return errinput!("table name can't be empty");
}
if self.columns.is_empty() {
return errinput!("table has no columns");
}
if self.columns.get(self.primary_key).is_none() {
return errinput!("invalid primary key index");
}
for (i, column) in self.columns.iter().enumerate() {
if column.name.is_empty() {
return errinput!("column name can't be empty");
}
let (cname, ctype) = (&column.name, &column.datatype); // for formatting convenience
// Validate primary key.
let is_primary_key = i == self.primary_key;
if is_primary_key {
if column.nullable {
return errinput!("primary key {cname} cannot be nullable");
}
if !column.unique {
return errinput!("primary key {cname} must be unique");
}
if column.index {
return errinput!("primary key {cname} can't have an index");
}
}
// Validate default value.
match column.default.as_ref().map(|v| v.datatype()) {
None if column.nullable => {
return errinput!("nullable column {cname} must have a default value");
}
Some(None) if !column.nullable => {
return errinput!("invalid NULL default for non-nullable column {cname}");
}
Some(Some(vtype)) if vtype != column.datatype => {
return errinput!("invalid default type {vtype} for {ctype} column {cname}");
}
Some(_) | None => {}
}
// Validate unique index.
if column.unique && !column.index && !is_primary_key {
return errinput!("unique column {cname} must have a secondary index");
}
// Validate references.
if let Some(reference) = &column.references {
if !column.index && !is_primary_key {
return errinput!("reference column {cname} must have a secondary index");
}
let reftype = if reference == &self.name {
self.columns[self.primary_key].datatype
} else if let Some(target) = catalog.get_table(reference)? {
target.columns[target.primary_key].datatype
} else {
return errinput!("unknown table {reference} referenced by column {cname}");
};
if column.datatype != reftype {
return errinput!(
"can't reference {reftype} primary key of {reference} from {ctype} column {cname}"
);
}
}
}
Ok(())
}

Table rows are validated via Table::validate_row(), which ensures that a sql::types::Row conforms to the schema (e.g. that value types match the column data types). It uses a sql::engine::Transaction to look up other rows in the database, e.g. to check for primary key conflicts (we'll get back to this later).

/// Validates a row, including uniqueness and reference checks using the
/// given transaction.
///
/// If update is true, the row replaces an existing entry with the same
/// primary key. Otherwise, it is an insert. Primary key changes are
/// implemented as a delete+insert.
///
/// Validating uniqueness and references individually for each row is not
/// performant, but it's fine for our purposes.
pub fn validate_row(&self, row: &Row, update: bool, txn: &impl Transaction) -> Result<()> {
if row.len() != self.columns.len() {
return errinput!("invalid row size for table {}", self.name);
}
// Validate primary key.
let id = &row[self.primary_key];
let idslice = &row[self.primary_key..=self.primary_key];
if id.is_undefined() {
return errinput!("invalid primary key {id}");
}
if !update && !txn.get(&self.name, idslice)?.is_empty() {
return errinput!("primary key {id} already exists");
}
for (i, (column, value)) in self.columns.iter().zip(row).enumerate() {
let (cname, ctype) = (&column.name, &column.datatype);
let valueslice = &row[i..=i];
// Validate datatype.
if let Some(ref vtype) = value.datatype() {
if vtype != ctype {
return errinput!("invalid datatype {vtype} for {ctype} column {cname}");
}
}
if value == &Value::Null && !column.nullable {
return errinput!("NULL value not allowed for column {cname}");
}
// Validate outgoing references.
if let Some(target) = &column.references {
match value {
// NB: NaN is not a valid primary key, and not valid as a
// missing foreign key marker.
Value::Null => {}
v if target == &self.name && v == id => {}
v if txn.get(target, valueslice)?.is_empty() => {
return errinput!("reference {v} not in table {target}");
}
_ => {}
}
}
// Validate uniqueness constraints. Unique columns are indexed.
if column.unique && i != self.primary_key && !value.is_undefined() {
let mut index = txn.lookup_index(&self.name, &column.name, valueslice)?;
if update {
index.remove(id); // ignore existing version of this row
}
if !index.is_empty() {
return errinput!("value {value} already in unique column {cname}");
}
}
}
Ok(())
}

Expressions

During SQL execution, we also have to model expressions, such as 1 + 2 * 3. These are represented as values and operations on them, and can be nested as a tree to represent compound operations.

/// An expression, made up of nested operations and values. Values are either
/// constants, or numeric column references which are looked up in rows.
/// Evaluated to a final value during query execution.
///
/// Since this is a recursive data structure, we have to box each child
/// expression, which incurs a heap allocation per expression node. There are
/// clever ways to avoid this, but we keep it simple.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub enum Expression {
/// A constant value.
Constant(Value),
/// A column reference. Looks up the value in a row during evaluation.
Column(usize),
/// a AND b: logical AND of two booleans.
And(Box<Expression>, Box<Expression>),
/// a OR b: logical OR of two booleans.
Or(Box<Expression>, Box<Expression>),
/// NOT a: logical NOT of a boolean.
Not(Box<Expression>),
/// a = b: equality comparison of two values.
Equal(Box<Expression>, Box<Expression>),
/// Greater than comparison of two values: a > b.
GreaterThan(Box<Expression>, Box<Expression>),
/// a < b: less than comparison of two values.
LessThan(Box<Expression>, Box<Expression>),
/// a IS NULL or a IS NAN: checks for the given value.
Is(Box<Expression>, Value),
/// a + b: adds two numbers.
Add(Box<Expression>, Box<Expression>),
/// a / b: divides two numbers.
Divide(Box<Expression>, Box<Expression>),
/// a ^b: exponentiates two numbers.
Exponentiate(Box<Expression>, Box<Expression>),
/// a!: takes the factorial of a number (4! = 4*3*2*1).
Factorial(Box<Expression>),
/// +a: the identify function, which simply returns the same number.
Identity(Box<Expression>),
/// a * b: multiplies two numbers.
Multiply(Box<Expression>, Box<Expression>),
/// -a: negates the given number.
Negate(Box<Expression>),
/// a % b: the remainder after dividing two numbers.
Remainder(Box<Expression>, Box<Expression>),
/// √a: takes the square root of a number.
SquareRoot(Box<Expression>),
/// a - b: subtracts two numbers.
Subtract(Box<Expression>, Box<Expression>),
// a LIKE b: checks if a string matches a pattern.
Like(Box<Expression>, Box<Expression>),
}

For example, the expression 1 + 2 * 3 (taking precedence into account) is represented as:

//    +
//   / \
//  1   *
//     /  \
//    2    3
Expression::Add(
    Expression::Constant(Value::Integer(1)),
    Expression::Multiply(
        Expression::Constant(Value::Integer(2)),
        Expression::Constant(Value::Integer(3)),
    ),
)

An Expression can contain two kinds of values: constant values as Expression::Constant(sql::types::Value), and dynamic values as Expression::Column(usize) column references. The latter will fetch a sql::types::Value from a sql::types::Row at the specified index during evaluation.

We'll see later how the SQL parser and planner transforms text expression like 1 + 2 * 3 into an Expression, and how it resolves column names to row indexes like price * 0.25 to row[3] * 0.25.

Expressions are evaluated recursively via Expression::evalute(), given a sql::types::Row with input values for column references, and return a final sql::types::Value result:

/// Evaluates an expression, returning a constant value. Column references
/// are looked up in the given row (or panic if the row is None).
pub fn evaluate(&self, row: Option<&Row>) -> Result<Value> {
use Value::*;
Ok(match self {
// Constant values return themselves.
Self::Constant(value) => value.clone(),
// Column references look up a row value. The planner ensures that
// only constant expressions are evaluated without a row.
Self::Column(index) => row.and_then(|r| r.get(*index)).cloned().expect("invalid index"),
// Logical AND. Inputs must be boolean or NULL. NULLs generally
// yield NULL, except the special case NULL AND false == false.
Self::And(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) {
(Boolean(lhs), Boolean(rhs)) => Boolean(lhs && rhs),
(Boolean(b), Null) | (Null, Boolean(b)) if !b => Boolean(false),
(Boolean(_), Null) | (Null, Boolean(_)) | (Null, Null) => Null,
(lhs, rhs) => return errinput!("can't AND {lhs} and {rhs}"),
},
// Logical OR. Inputs must be boolean or NULL. NULLs generally
// yield NULL, except the special case NULL OR true == true.
Self::Or(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) {
(Boolean(lhs), Boolean(rhs)) => Boolean(lhs || rhs),
(Boolean(b), Null) | (Null, Boolean(b)) if b => Boolean(true),
(Boolean(_), Null) | (Null, Boolean(_)) | (Null, Null) => Null,
(lhs, rhs) => return errinput!("can't OR {lhs} and {rhs}"),
},
// Logical NOT. Input must be boolean or NULL.
Self::Not(expr) => match expr.evaluate(row)? {
Boolean(b) => Boolean(!b),
Null => Null,
value => return errinput!("can't NOT {value}"),
},
// Comparisons. Must be of same type, except floats and integers
// which are interchangeable. NULLs yield NULL, NaNs yield NaN.
//
// Does not dispatch to Value.cmp() because comparison and sorting
// is different for Nulls and NaNs in SQL and code.
#[allow(clippy::float_cmp)]
Self::Equal(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) {
(Boolean(lhs), Boolean(rhs)) => Boolean(lhs == rhs),
(Integer(lhs), Integer(rhs)) => Boolean(lhs == rhs),
(Integer(lhs), Float(rhs)) => Boolean(lhs as f64 == rhs),
(Float(lhs), Integer(rhs)) => Boolean(lhs == rhs as f64),
(Float(lhs), Float(rhs)) => Boolean(lhs == rhs),
(String(lhs), String(rhs)) => Boolean(lhs == rhs),
(Null, _) | (_, Null) => Null,
(lhs, rhs) => return errinput!("can't compare {lhs} and {rhs}"),
},
Self::GreaterThan(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) {
#[allow(clippy::bool_comparison)]
(Boolean(lhs), Boolean(rhs)) => Boolean(lhs > rhs),
(Integer(lhs), Integer(rhs)) => Boolean(lhs > rhs),
(Integer(lhs), Float(rhs)) => Boolean(lhs as f64 > rhs),
(Float(lhs), Integer(rhs)) => Boolean(lhs > rhs as f64),
(Float(lhs), Float(rhs)) => Boolean(lhs > rhs),
(String(lhs), String(rhs)) => Boolean(lhs > rhs),
(Null, _) | (_, Null) => Null,
(lhs, rhs) => return errinput!("can't compare {lhs} and {rhs}"),
},
Self::LessThan(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) {
#[allow(clippy::bool_comparison)]
(Boolean(lhs), Boolean(rhs)) => Boolean(lhs < rhs),
(Integer(lhs), Integer(rhs)) => Boolean(lhs < rhs),
(Integer(lhs), Float(rhs)) => Boolean((lhs as f64) < rhs),
(Float(lhs), Integer(rhs)) => Boolean(lhs < rhs as f64),
(Float(lhs), Float(rhs)) => Boolean(lhs < rhs),
(String(lhs), String(rhs)) => Boolean(lhs < rhs),
(Null, _) | (_, Null) => Null,
(lhs, rhs) => return errinput!("can't compare {lhs} and {rhs}"),
},
Self::Is(expr, Null) => Boolean(expr.evaluate(row)? == Null),
Self::Is(expr, Float(f)) if f.is_nan() => match expr.evaluate(row)? {
Float(f) => Boolean(f.is_nan()),
Null => Null,
v => return errinput!("IS NAN can't be used with {}", v.datatype().unwrap()),
},
Self::Is(_, v) => panic!("invalid IS value {v}"), // enforced by parser
// Mathematical operations. Inputs must be numbers, but integers and
// floats are interchangeable (float when mixed). NULLs yield NULL.
// Errors on integer overflow, but floats yield infinity or NaN.
Self::Add(lhs, rhs) => lhs.evaluate(row)?.checked_add(&rhs.evaluate(row)?)?,
Self::Divide(lhs, rhs) => lhs.evaluate(row)?.checked_div(&rhs.evaluate(row)?)?,
Self::Exponentiate(lhs, rhs) => lhs.evaluate(row)?.checked_pow(&rhs.evaluate(row)?)?,
Self::Factorial(expr) => match expr.evaluate(row)? {
Integer(i @ 0..) => {
(1..=i).try_fold(Integer(1), |p, i| p.checked_mul(&Integer(i)))?
}
Null => Null,
value => return errinput!("can't take factorial of {value}"),
},
Self::Identity(expr) => match expr.evaluate(row)? {
value @ (Integer(_) | Float(_) | Null) => value,
expr => return errinput!("can't take the identity of {expr}"),
},
Self::Multiply(lhs, rhs) => lhs.evaluate(row)?.checked_mul(&rhs.evaluate(row)?)?,
Self::Negate(expr) => match expr.evaluate(row)? {
Integer(i) => Integer(-i),
Float(f) => Float(-f),
Null => Null,
value => return errinput!("can't negate {value}"),
},
Self::Remainder(lhs, rhs) => lhs.evaluate(row)?.checked_rem(&rhs.evaluate(row)?)?,
Self::SquareRoot(expr) => match expr.evaluate(row)? {
Integer(i @ 0..) => Float((i as f64).sqrt()),
Float(f) => Float(f.sqrt()),
Null => Null,
value => return errinput!("can't take square root of {value}"),
},
Self::Subtract(lhs, rhs) => lhs.evaluate(row)?.checked_sub(&rhs.evaluate(row)?)?,
// LIKE pattern matching, using _ and % as single- and
// multi-character wildcards. Inputs must be strings. NULLs yield
// NULL. There's no support for escaping an _ and %.
Self::Like(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) {
(String(lhs), String(rhs)) => {
// We could precompile the pattern if it's constant, instead
// of recompiling it for every row, but we keep it simple.
let pattern =
format!("^{}$", regex::escape(&rhs).replace('%', ".*").replace('_', "."));
Boolean(Regex::new(&pattern)?.is_match(&lhs))
}
(String(_), Null) | (Null, String(_)) | (Null, Null) => Null,
(lhs, rhs) => return errinput!("can't LIKE {lhs} and {rhs}"),
},
})
}

Many of the comparison operations like == are implemented explicitly here instead of using sql::types::Value comparisons. This is where we implement the SQL semantics of special values like NULL, such that NULL = NULL yields NULL instead of TRUE.

For mathematical operations however, we generally dispatch to these methods on sql::types::Value:

/// Adds two values. Errors if invalid.
pub fn checked_add(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) => match lhs.checked_add(*rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
},
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 + rhs),
(Float(lhs), Integer(rhs)) => Float(lhs + *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs + rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't add {lhs} and {rhs}"),
})
}
/// Divides two values. Errors if invalid.
pub fn checked_div(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(_), Integer(0)) => return errinput!("can't divide by zero"),
(Integer(lhs), Integer(rhs)) => Integer(lhs / rhs),
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 / rhs),
(Float(lhs), Integer(rhs)) => Float(lhs / *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs / rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't divide {lhs} and {rhs}"),
})
}
/// Multiplies two values. Errors if invalid.
pub fn checked_mul(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) => match lhs.checked_mul(*rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
},
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 * rhs),
(Float(lhs), Integer(rhs)) => Float(lhs * *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs * rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't multiply {lhs} and {rhs}"),
})
}
/// Exponentiates two values. Errors if invalid.
pub fn checked_pow(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) if *rhs >= 0 => {
let rhs = (*rhs).try_into().or_else(|_| errinput!("integer overflow"))?;
match lhs.checked_pow(rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
}
}
(Integer(lhs), Integer(rhs)) => Float((*lhs as f64).powf(*rhs as f64)),
(Integer(lhs), Float(rhs)) => Float((*lhs as f64).powf(*rhs)),
(Float(lhs), Integer(rhs)) => Float((lhs).powi(*rhs as i32)),
(Float(lhs), Float(rhs)) => Float((lhs).powf(*rhs)),
(Integer(_) | Float(_), Null) => Null,
(Null, Integer(_) | Float(_) | Null) => Null,
(lhs, rhs) => return errinput!("can't exponentiate {lhs} and {rhs}"),
})
}
/// Finds the remainder of two values. Errors if invalid.
///
/// NB: uses the remainder, not modulo, like Postgres. This means that for
/// negative values, the result has the sign of the dividend, rather than
/// always returning a positive value.
pub fn checked_rem(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(_), Integer(0)) => return errinput!("can't divide by zero"),
(Integer(lhs), Integer(rhs)) => Integer(lhs % rhs),
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 % rhs),
(Float(lhs), Integer(rhs)) => Float(lhs % *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs % rhs),
(Integer(_) | Float(_) | Null, Null) => Null,
(Null, Integer(_) | Float(_)) => Null,
(lhs, rhs) => return errinput!("can't take remainder of {lhs} and {rhs}"),
})
}
/// Subtracts two values. Errors if invalid.
pub fn checked_sub(&self, other: &Self) -> Result<Self> {
use Value::*;
Ok(match (self, other) {
(Integer(lhs), Integer(rhs)) => match lhs.checked_sub(*rhs) {
Some(i) => Integer(i),
None => return errinput!("integer overflow"),
},
(Integer(lhs), Float(rhs)) => Float(*lhs as f64 - rhs),
(Float(lhs), Integer(rhs)) => Float(lhs - *rhs as f64),
(Float(lhs), Float(rhs)) => Float(lhs - rhs),
(Null, Integer(_) | Float(_) | Null) => Null,
(Integer(_) | Float(_), Null) => Null,
(lhs, rhs) => return errinput!("can't subtract {lhs} and {rhs}"),
})
}

Expression parsing and evaluation is tested via test scripts in sql/testscripts/expression.


SQL Engine   |   SQL Storage