The SQL data model represents user data in tables and rows. It is made up of data types and schemas,
in the sql::types
module.
toyDB supports four basic scalar data types as sql::types::DataType: booleans, integers, floats,
and strings.
Lines 15 to 27 in b2fe7b7
| /// A primitive SQL data type. For simplicity, only a handful of scalar types | |
| /// are supported (no compound types). | |
| #[derive(Clone, Copy, Debug, Hash, PartialEq, Serialize, Deserialize)] | |
| pub enum DataType { | |
| /// A boolean: true or false. | |
| Boolean, | |
| /// A 64-bit signed integer. | |
| Integer, | |
| /// A 64-bit floating point number. | |
| Float, | |
| /// A UTF-8 encoded string. | |
| String, | |
| } |
Specific values are represented as sql::types::Value, using the corresponding Rust types. toyDB
also supports SQL NULL values, i.e. unknown values, following the rules of
three-valued logic.
Lines 40 to 64 in b2fe7b7
| /// A primitive SQL value, represented as a native Rust type. | |
| #[derive(Clone, Debug, Serialize, Deserialize)] | |
| pub enum Value { | |
| /// An unknown value of unknown type (i.e. SQL NULL). | |
| /// | |
| /// In code, Null is considered equal to Null, so that we can detect, index, | |
| /// and order these values. The SQL NULL semantics are implemented during | |
| /// Expression evaluation. | |
| Null, | |
| /// A boolean. | |
| Boolean(bool), | |
| /// A 64-bit signed integer. | |
| Integer(i64), | |
| /// A 64-bit floating point number. | |
| /// | |
| /// In code, NaN is considered equal to NaN, so that we can detect, index, | |
| /// and order these values. The SQL NAN semantics are implemented during | |
| /// Expression evaluation. | |
| /// | |
| /// -0.0 and -NaN are considered equal to their positive counterpart, and | |
| /// normalized as positive when serialized (for key lookups). | |
| Float(#[serde(serialize_with = "serialize_f64")] f64), | |
| /// A UTF-8 encoded string. | |
| String(String), | |
| } |
The Value type provides basic formatting, conversion, and mathematical operations.
Lines 68 to 79 in 686d397
| impl Display for Value { | |
| fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { | |
| match self { | |
| Self::Null => f.write_str("NULL"), | |
| Self::Boolean(true) => f.write_str("TRUE"), | |
| Self::Boolean(false) => f.write_str("FALSE"), | |
| Self::Integer(integer) => integer.fmt(f), | |
| Self::Float(float) => write!(f, "{float:?}"), | |
| Self::String(string) => write!(f, "'{}'", string.escape_debug()), | |
| } | |
| } | |
| } |
Lines 164 to 370 in 686d397
| impl Value { | |
| /// Returns the value's datatype, or None for null values. | |
| pub fn datatype(&self) -> Option<DataType> { | |
| match self { | |
| Self::Null => None, | |
| Self::Boolean(_) => Some(DataType::Boolean), | |
| Self::Integer(_) => Some(DataType::Integer), | |
| Self::Float(_) => Some(DataType::Float), | |
| Self::String(_) => Some(DataType::String), | |
| } | |
| } | |
| /// Returns true if the value is undefined (NULL or NaN). | |
| pub fn is_undefined(&self) -> bool { | |
| match self { | |
| Self::Null => true, | |
| Self::Float(f) if f.is_nan() => true, | |
| _ => false, | |
| } | |
| } | |
| /// Adds two values. Errors if invalid. | |
| pub fn checked_add(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) => match lhs.checked_add(*rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| }, | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 + rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs + *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs + rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't add {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Divides two values. Errors if invalid. | |
| pub fn checked_div(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(_), Integer(0)) => return errinput!("can't divide by zero"), | |
| (Integer(lhs), Integer(rhs)) => Integer(lhs / rhs), | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 / rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs / *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs / rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't divide {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Multiplies two values. Errors if invalid. | |
| pub fn checked_mul(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) => match lhs.checked_mul(*rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| }, | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 * rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs * *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs * rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't multiply {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Exponentiates two values. Errors if invalid. | |
| pub fn checked_pow(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) if *rhs >= 0 => { | |
| let rhs = (*rhs).try_into().or_else(|_| errinput!("integer overflow"))?; | |
| match lhs.checked_pow(rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| } | |
| } | |
| (Integer(lhs), Integer(rhs)) => Float((*lhs as f64).powf(*rhs as f64)), | |
| (Integer(lhs), Float(rhs)) => Float((*lhs as f64).powf(*rhs)), | |
| (Float(lhs), Integer(rhs)) => Float((lhs).powi(*rhs as i32)), | |
| (Float(lhs), Float(rhs)) => Float((lhs).powf(*rhs)), | |
| (Integer(_) | Float(_), Null) => Null, | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (lhs, rhs) => return errinput!("can't exponentiate {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Finds the remainder of two values. Errors if invalid. | |
| /// | |
| /// NB: uses the remainder, not modulo, like Postgres. This means that for | |
| /// negative values, the result has the sign of the dividend, rather than | |
| /// always returning a positive value. | |
| pub fn checked_rem(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(_), Integer(0)) => return errinput!("can't divide by zero"), | |
| (Integer(lhs), Integer(rhs)) => Integer(lhs % rhs), | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 % rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs % *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs % rhs), | |
| (Integer(_) | Float(_) | Null, Null) => Null, | |
| (Null, Integer(_) | Float(_)) => Null, | |
| (lhs, rhs) => return errinput!("can't take remainder of {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Subtracts two values. Errors if invalid. | |
| pub fn checked_sub(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) => match lhs.checked_sub(*rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| }, | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 - rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs - *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs - rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't subtract {lhs} and {rhs}"), | |
| }) | |
| } | |
| } | |
| impl From<bool> for Value { | |
| fn from(v: bool) -> Self { | |
| Value::Boolean(v) | |
| } | |
| } | |
| impl From<f64> for Value { | |
| fn from(v: f64) -> Self { | |
| Value::Float(v) | |
| } | |
| } | |
| impl From<i64> for Value { | |
| fn from(v: i64) -> Self { | |
| Value::Integer(v) | |
| } | |
| } | |
| impl From<String> for Value { | |
| fn from(v: String) -> Self { | |
| Value::String(v) | |
| } | |
| } | |
| impl From<&str> for Value { | |
| fn from(v: &str) -> Self { | |
| Value::String(v.to_owned()) | |
| } | |
| } | |
| impl TryFrom<Value> for bool { | |
| type Error = Error; | |
| fn try_from(value: Value) -> Result<Self> { | |
| let Value::Boolean(b) = value else { | |
| return errdata!("not a boolean: {value}"); | |
| }; | |
| Ok(b) | |
| } | |
| } | |
| impl TryFrom<Value> for f64 { | |
| type Error = Error; | |
| fn try_from(value: Value) -> Result<Self> { | |
| let Value::Float(f) = value else { | |
| return errdata!("not a float: {value}"); | |
| }; | |
| Ok(f) | |
| } | |
| } | |
| impl TryFrom<Value> for i64 { | |
| type Error = Error; | |
| fn try_from(value: Value) -> Result<Self> { | |
| let Value::Integer(i) = value else { | |
| return errdata!("not an integer: {value}"); | |
| }; | |
| Ok(i) | |
| } | |
| } | |
| impl TryFrom<Value> for String { | |
| type Error = Error; | |
| fn try_from(value: Value) -> Result<Self> { | |
| let Value::String(s) = value else { | |
| return errdata!("not a string: {value}"); | |
| }; | |
| Ok(s) | |
| } | |
| } |
It also specifies comparison and ordering semantics, but these are subtly different from the SQL
semantics. For example, in Rust code Value::Null == Value::Null yields true, while in SQL
NULL = NULL yields NULL. This mismatch is necessary for the Rust code to properly detect and
process Null values, and the desired SQL semantics are implemented during expression evaluation
which we'll cover below.
Lines 91 to 162 in b2fe7b7
| // Consider Nulls and ±NaNs equal. Rust already considers -0.0 == 0.0. | |
| impl PartialEq for Value { | |
| fn eq(&self, other: &Self) -> bool { | |
| match (self, other) { | |
| (Self::Boolean(a), Self::Boolean(b)) => a == b, | |
| (Self::Integer(a), Self::Integer(b)) => a == b, | |
| (Self::Integer(a), Self::Float(b)) => *a as f64 == *b, | |
| (Self::Float(a), Self::Integer(b)) => *a == *b as f64, | |
| (Self::Float(a), Self::Float(b)) => a == b || a.is_nan() && b.is_nan(), | |
| (Self::String(a), Self::String(b)) => a == b, | |
| (Self::Null, Self::Null) => true, | |
| (_, _) => false, | |
| } | |
| } | |
| } | |
| impl Eq for Value {} | |
| // Allow hashing Nulls and floats, and hash -0.0 and -NaN as positive. | |
| impl Hash for Value { | |
| fn hash<H: Hasher>(&self, hasher: &mut H) { | |
| core::mem::discriminant(self).hash(hasher); // hash the type | |
| match self { | |
| Self::Null => {} | |
| Self::Boolean(v) => v.hash(hasher), | |
| Self::Integer(v) => v.hash(hasher), | |
| Self::Float(v) => { | |
| // Hash -NaN and -0.0 as positive. | |
| let mut v = *v; | |
| if (v.is_nan() || v == 0.0) && v.is_sign_negative() { | |
| v = -v; | |
| } | |
| v.to_bits().hash(hasher) | |
| } | |
| Self::String(v) => v.hash(hasher), | |
| } | |
| } | |
| } | |
| // Consider Nulls and NaNs equal when ordering. | |
| // | |
| // We establish a total order across all types, even though mixed types will | |
| // rarely/never come up: String > Integer/Float > Boolean > Null. | |
| impl Ord for Value { | |
| fn cmp(&self, other: &Self) -> Ordering { | |
| match (self, other) { | |
| (Self::Null, Self::Null) => Ordering::Equal, | |
| (Self::Boolean(a), Self::Boolean(b)) => a.cmp(b), | |
| (Self::Integer(a), Self::Integer(b)) => a.cmp(b), | |
| (Self::Integer(a), Self::Float(b)) => (*a as f64).total_cmp(b), | |
| (Self::Float(a), Self::Integer(b)) => a.total_cmp(&(*b as f64)), | |
| (Self::Float(a), Self::Float(b)) => a.total_cmp(b), | |
| (Self::String(a), Self::String(b)) => a.cmp(b), | |
| (Self::Null, _) => Ordering::Less, | |
| (_, Self::Null) => Ordering::Greater, | |
| (Self::Boolean(_), _) => Ordering::Less, | |
| (_, Self::Boolean(_)) => Ordering::Greater, | |
| (Self::Float(_), _) => Ordering::Less, | |
| (_, Self::Float(_)) => Ordering::Greater, | |
| (Self::Integer(_), _) => Ordering::Less, | |
| (_, Self::Integer(_)) => Ordering::Greater, | |
| // String is ordered last. | |
| } | |
| } | |
| } | |
| impl PartialOrd for Value { | |
| fn partial_cmp(&self, other: &Self) -> Option<Ordering> { | |
| Some(self.cmp(other)) | |
| } | |
| } |
During execution, a row of values is represented as sql::types::Row, with multiple rows emitted
via sql::types::Rows row iterators:
Lines 378 to 388 in b2fe7b7
| /// A row of values. | |
| pub type Row = Vec<Value>; | |
| /// A row iterator. | |
| pub type Rows = Box<dyn RowIterator>; | |
| /// A row iterator trait, which requires the iterator to be both clonable and | |
| /// object-safe. Cloning allows resetting an iterator back to an initial state, | |
| /// e.g. for nested loop joins. It uses a blanket implementation, and relies on | |
| /// dyn_clone to allow cloning trait objects. | |
| pub trait RowIterator: Iterator<Item = Result<Row>> + DynClone {} |
toyDB schemas only support tables. There are no named indexes or constraints, and there's only a single unnamed database.
Tables are represented by sql::types::Table:
Lines 12 to 25 in c2b0f7f
| /// A table schema, which specifies its data structure and constraints. | |
| /// | |
| /// Tables can't change after they are created. There is no ALTER TABLE nor | |
| /// CREATE/DROP INDEX, only CREATE TABLE and DROP TABLE. | |
| #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] | |
| pub struct Table { | |
| /// The table name. Unique identifier for the table. Can't be empty. | |
| pub name: String, | |
| /// The primary key column index. A table must have a primary key, and it | |
| /// can only be a single column. | |
| pub primary_key: usize, | |
| /// The table's columns. Must have at least one. | |
| pub columns: Vec<Column>, | |
| } |
A table is made up of a set of columns, represented by sql::types::Column. These support the data
types described above, along with unique constraints, foreign keys, and secondary indexes.
Lines 29 to 53 in c2b0f7f
| /// A table column. | |
| #[derive(Clone, Debug, PartialEq, Deserialize, Serialize)] | |
| pub struct Column { | |
| /// Column name. Can't be empty. | |
| pub name: String, | |
| /// Column datatype. | |
| pub datatype: DataType, | |
| /// Whether the column allows null values. Not legal for primary keys. | |
| pub nullable: bool, | |
| /// The column's default value. If None, the user must specify an explicit | |
| /// value. Must match the column datatype. Nullable columns require a | |
| /// default (often Null). Null is only a valid default when nullable. | |
| pub default: Option<Value>, | |
| /// Whether the column should only allow unique values (ignoring NULLs). | |
| /// Must be true for a primary key column. Requires index. | |
| pub unique: bool, | |
| /// Whether the column should have a secondary index. Must be false for | |
| /// primary keys, which are the primary index. Must be true for unique or | |
| /// reference columns. | |
| pub index: bool, | |
| /// If set, this column is a foreign key reference to the given table's | |
| /// primary key. Must be of the same type as the target primary key. | |
| /// Requires index. | |
| pub references: Option<String>, | |
| } |
The table name serves as a unique identifier, and can't be changed later. In fact, tables schemas are entirely static: they can only be created or dropped (there are no schema changes).
Table schemas are stored in the catalog, represented by the sql::engine::Catalog trait. We'll
revisit the implementation of this trait in the SQL storage section.
toydb/src/sql/engine/engine.rs
Lines 60 to 79 in 0839215
| /// The catalog stores table schema information. It must be implemented for | |
| /// Transaction, and is thus fully transactional. For simplicity, it only | |
| /// supports creating and dropping tables -- there are no ALTER TABLE schema | |
| /// changes, nor CREATE INDEX. | |
| pub trait Catalog { | |
| /// Creates a new table. Errors if it already exists. | |
| fn create_table(&self, table: Table) -> Result<()>; | |
| /// Drops a table. Errors if it does not exist, unless if_exists is true. | |
| /// Returns true if the table existed and was deleted. | |
| fn drop_table(&self, table: &str, if_exists: bool) -> Result<bool>; | |
| /// Fetches a table schema, or None if it doesn't exist. | |
| fn get_table(&self, table: &str) -> Result<Option<Table>>; | |
| /// Returns a list of all table schemas. | |
| fn list_tables(&self) -> Result<Vec<Table>>; | |
| /// Fetches a table schema, or errors if it does not exist. | |
| fn must_get_table(&self, table: &str) -> Result<Table> { | |
| self.get_table(table)?.ok_or_else(|| errinput!("table {table} does not exist")) | |
| } | |
| } |
Table schemas are validated when created via Table::validate(), which enforces invariants and
internal consistency. It uses the catalog to look up information about other tables, e.g. that
foreign key references point to a valid target column in a different table.
Lines 98 to 170 in c2b0f7f
| /// Validates the table schema, using the catalog to validate foreign key | |
| /// references. | |
| pub fn validate(&self, catalog: &impl Catalog) -> Result<()> { | |
| if self.name.is_empty() { | |
| return errinput!("table name can't be empty"); | |
| } | |
| if self.columns.is_empty() { | |
| return errinput!("table has no columns"); | |
| } | |
| if self.columns.get(self.primary_key).is_none() { | |
| return errinput!("invalid primary key index"); | |
| } | |
| for (i, column) in self.columns.iter().enumerate() { | |
| if column.name.is_empty() { | |
| return errinput!("column name can't be empty"); | |
| } | |
| let (cname, ctype) = (&column.name, &column.datatype); // for formatting convenience | |
| // Validate primary key. | |
| let is_primary_key = i == self.primary_key; | |
| if is_primary_key { | |
| if column.nullable { | |
| return errinput!("primary key {cname} cannot be nullable"); | |
| } | |
| if !column.unique { | |
| return errinput!("primary key {cname} must be unique"); | |
| } | |
| if column.index { | |
| return errinput!("primary key {cname} can't have an index"); | |
| } | |
| } | |
| // Validate default value. | |
| match column.default.as_ref().map(|v| v.datatype()) { | |
| None if column.nullable => { | |
| return errinput!("nullable column {cname} must have a default value"); | |
| } | |
| Some(None) if !column.nullable => { | |
| return errinput!("invalid NULL default for non-nullable column {cname}"); | |
| } | |
| Some(Some(vtype)) if vtype != column.datatype => { | |
| return errinput!("invalid default type {vtype} for {ctype} column {cname}"); | |
| } | |
| Some(_) | None => {} | |
| } | |
| // Validate unique index. | |
| if column.unique && !column.index && !is_primary_key { | |
| return errinput!("unique column {cname} must have a secondary index"); | |
| } | |
| // Validate references. | |
| if let Some(reference) = &column.references { | |
| if !column.index && !is_primary_key { | |
| return errinput!("reference column {cname} must have a secondary index"); | |
| } | |
| let reftype = if reference == &self.name { | |
| self.columns[self.primary_key].datatype | |
| } else if let Some(target) = catalog.get_table(reference)? { | |
| target.columns[target.primary_key].datatype | |
| } else { | |
| return errinput!("unknown table {reference} referenced by column {cname}"); | |
| }; | |
| if column.datatype != reftype { | |
| return errinput!( | |
| "can't reference {reftype} primary key of {reference} from {ctype} column {cname}" | |
| ); | |
| } | |
| } | |
| } | |
| Ok(()) | |
| } |
Table rows are validated via Table::validate_row(), which ensures that a sql::types::Row
conforms to the schema (e.g. that value types match the column data types). It uses a
sql::engine::Transaction to look up other rows in the database, e.g. to check for primary key
conflicts (we'll get back to this later).
Lines 172 to 236 in c2b0f7f
| /// Validates a row, including uniqueness and reference checks using the | |
| /// given transaction. | |
| /// | |
| /// If update is true, the row replaces an existing entry with the same | |
| /// primary key. Otherwise, it is an insert. Primary key changes are | |
| /// implemented as a delete+insert. | |
| /// | |
| /// Validating uniqueness and references individually for each row is not | |
| /// performant, but it's fine for our purposes. | |
| pub fn validate_row(&self, row: &Row, update: bool, txn: &impl Transaction) -> Result<()> { | |
| if row.len() != self.columns.len() { | |
| return errinput!("invalid row size for table {}", self.name); | |
| } | |
| // Validate primary key. | |
| let id = &row[self.primary_key]; | |
| let idslice = &row[self.primary_key..=self.primary_key]; | |
| if id.is_undefined() { | |
| return errinput!("invalid primary key {id}"); | |
| } | |
| if !update && !txn.get(&self.name, idslice)?.is_empty() { | |
| return errinput!("primary key {id} already exists"); | |
| } | |
| for (i, (column, value)) in self.columns.iter().zip(row).enumerate() { | |
| let (cname, ctype) = (&column.name, &column.datatype); | |
| let valueslice = &row[i..=i]; | |
| // Validate datatype. | |
| if let Some(ref vtype) = value.datatype() { | |
| if vtype != ctype { | |
| return errinput!("invalid datatype {vtype} for {ctype} column {cname}"); | |
| } | |
| } | |
| if value == &Value::Null && !column.nullable { | |
| return errinput!("NULL value not allowed for column {cname}"); | |
| } | |
| // Validate outgoing references. | |
| if let Some(target) = &column.references { | |
| match value { | |
| // NB: NaN is not a valid primary key, and not valid as a | |
| // missing foreign key marker. | |
| Value::Null => {} | |
| v if target == &self.name && v == id => {} | |
| v if txn.get(target, valueslice)?.is_empty() => { | |
| return errinput!("reference {v} not in table {target}"); | |
| } | |
| _ => {} | |
| } | |
| } | |
| // Validate uniqueness constraints. Unique columns are indexed. | |
| if column.unique && i != self.primary_key && !value.is_undefined() { | |
| let mut index = txn.lookup_index(&self.name, &column.name, valueslice)?; | |
| if update { | |
| index.remove(id); // ignore existing version of this row | |
| } | |
| if !index.is_empty() { | |
| return errinput!("value {value} already in unique column {cname}"); | |
| } | |
| } | |
| } | |
| Ok(()) | |
| } |
During SQL execution, we also have to model expressions, such as 1 + 2 * 3. These are
represented as values and operations on them, and can be nested as a tree to represent compound
operations.
toydb/src/sql/types/expression.rs
Lines 11 to 64 in 9419bcf
| /// An expression, made up of nested operations and values. Values are either | |
| /// constants, or numeric column references which are looked up in rows. | |
| /// Evaluated to a final value during query execution. | |
| /// | |
| /// Since this is a recursive data structure, we have to box each child | |
| /// expression, which incurs a heap allocation per expression node. There are | |
| /// clever ways to avoid this, but we keep it simple. | |
| #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] | |
| pub enum Expression { | |
| /// A constant value. | |
| Constant(Value), | |
| /// A column reference. Looks up the value in a row during evaluation. | |
| Column(usize), | |
| /// a AND b: logical AND of two booleans. | |
| And(Box<Expression>, Box<Expression>), | |
| /// a OR b: logical OR of two booleans. | |
| Or(Box<Expression>, Box<Expression>), | |
| /// NOT a: logical NOT of a boolean. | |
| Not(Box<Expression>), | |
| /// a = b: equality comparison of two values. | |
| Equal(Box<Expression>, Box<Expression>), | |
| /// Greater than comparison of two values: a > b. | |
| GreaterThan(Box<Expression>, Box<Expression>), | |
| /// a < b: less than comparison of two values. | |
| LessThan(Box<Expression>, Box<Expression>), | |
| /// a IS NULL or a IS NAN: checks for the given value. | |
| Is(Box<Expression>, Value), | |
| /// a + b: adds two numbers. | |
| Add(Box<Expression>, Box<Expression>), | |
| /// a / b: divides two numbers. | |
| Divide(Box<Expression>, Box<Expression>), | |
| /// a ^b: exponentiates two numbers. | |
| Exponentiate(Box<Expression>, Box<Expression>), | |
| /// a!: takes the factorial of a number (4! = 4*3*2*1). | |
| Factorial(Box<Expression>), | |
| /// +a: the identify function, which simply returns the same number. | |
| Identity(Box<Expression>), | |
| /// a * b: multiplies two numbers. | |
| Multiply(Box<Expression>, Box<Expression>), | |
| /// -a: negates the given number. | |
| Negate(Box<Expression>), | |
| /// a % b: the remainder after dividing two numbers. | |
| Remainder(Box<Expression>, Box<Expression>), | |
| /// √a: takes the square root of a number. | |
| SquareRoot(Box<Expression>), | |
| /// a - b: subtracts two numbers. | |
| Subtract(Box<Expression>, Box<Expression>), | |
| // a LIKE b: checks if a string matches a pattern. | |
| Like(Box<Expression>, Box<Expression>), | |
| } |
For example, the expression 1 + 2 * 3 (taking precedence
into account) is represented as:
// +
// / \
// 1 *
// / \
// 2 3
Expression::Add(
Expression::Constant(Value::Integer(1)),
Expression::Multiply(
Expression::Constant(Value::Integer(2)),
Expression::Constant(Value::Integer(3)),
),
)An Expression can contain two kinds of values: constant values as
Expression::Constant(sql::types::Value), and dynamic values as Expression::Column(usize) column
references. The latter will fetch a sql::types::Value from a sql::types::Row at the specified
index during evaluation.
We'll see later how the SQL parser and planner transforms text expression like 1 + 2 * 3 into an
Expression, and how it resolves column names to row indexes like price * 0.25 to
row[3] * 0.25.
Expressions are evaluated recursively via Expression::evalute(), given a sql::types::Row with
input values for column references, and return a final sql::types::Value result:
toydb/src/sql/types/expression.rs
Lines 73 to 208 in 9419bcf
| /// Evaluates an expression, returning a constant value. Column references | |
| /// are looked up in the given row (or panic if the row is None). | |
| pub fn evaluate(&self, row: Option<&Row>) -> Result<Value> { | |
| use Value::*; | |
| Ok(match self { | |
| // Constant values return themselves. | |
| Self::Constant(value) => value.clone(), | |
| // Column references look up a row value. The planner ensures that | |
| // only constant expressions are evaluated without a row. | |
| Self::Column(index) => row.and_then(|r| r.get(*index)).cloned().expect("invalid index"), | |
| // Logical AND. Inputs must be boolean or NULL. NULLs generally | |
| // yield NULL, except the special case NULL AND false == false. | |
| Self::And(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) { | |
| (Boolean(lhs), Boolean(rhs)) => Boolean(lhs && rhs), | |
| (Boolean(b), Null) | (Null, Boolean(b)) if !b => Boolean(false), | |
| (Boolean(_), Null) | (Null, Boolean(_)) | (Null, Null) => Null, | |
| (lhs, rhs) => return errinput!("can't AND {lhs} and {rhs}"), | |
| }, | |
| // Logical OR. Inputs must be boolean or NULL. NULLs generally | |
| // yield NULL, except the special case NULL OR true == true. | |
| Self::Or(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) { | |
| (Boolean(lhs), Boolean(rhs)) => Boolean(lhs || rhs), | |
| (Boolean(b), Null) | (Null, Boolean(b)) if b => Boolean(true), | |
| (Boolean(_), Null) | (Null, Boolean(_)) | (Null, Null) => Null, | |
| (lhs, rhs) => return errinput!("can't OR {lhs} and {rhs}"), | |
| }, | |
| // Logical NOT. Input must be boolean or NULL. | |
| Self::Not(expr) => match expr.evaluate(row)? { | |
| Boolean(b) => Boolean(!b), | |
| Null => Null, | |
| value => return errinput!("can't NOT {value}"), | |
| }, | |
| // Comparisons. Must be of same type, except floats and integers | |
| // which are interchangeable. NULLs yield NULL, NaNs yield NaN. | |
| // | |
| // Does not dispatch to Value.cmp() because comparison and sorting | |
| // is different for Nulls and NaNs in SQL and code. | |
| #[allow(clippy::float_cmp)] | |
| Self::Equal(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) { | |
| (Boolean(lhs), Boolean(rhs)) => Boolean(lhs == rhs), | |
| (Integer(lhs), Integer(rhs)) => Boolean(lhs == rhs), | |
| (Integer(lhs), Float(rhs)) => Boolean(lhs as f64 == rhs), | |
| (Float(lhs), Integer(rhs)) => Boolean(lhs == rhs as f64), | |
| (Float(lhs), Float(rhs)) => Boolean(lhs == rhs), | |
| (String(lhs), String(rhs)) => Boolean(lhs == rhs), | |
| (Null, _) | (_, Null) => Null, | |
| (lhs, rhs) => return errinput!("can't compare {lhs} and {rhs}"), | |
| }, | |
| Self::GreaterThan(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) { | |
| #[allow(clippy::bool_comparison)] | |
| (Boolean(lhs), Boolean(rhs)) => Boolean(lhs > rhs), | |
| (Integer(lhs), Integer(rhs)) => Boolean(lhs > rhs), | |
| (Integer(lhs), Float(rhs)) => Boolean(lhs as f64 > rhs), | |
| (Float(lhs), Integer(rhs)) => Boolean(lhs > rhs as f64), | |
| (Float(lhs), Float(rhs)) => Boolean(lhs > rhs), | |
| (String(lhs), String(rhs)) => Boolean(lhs > rhs), | |
| (Null, _) | (_, Null) => Null, | |
| (lhs, rhs) => return errinput!("can't compare {lhs} and {rhs}"), | |
| }, | |
| Self::LessThan(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) { | |
| #[allow(clippy::bool_comparison)] | |
| (Boolean(lhs), Boolean(rhs)) => Boolean(lhs < rhs), | |
| (Integer(lhs), Integer(rhs)) => Boolean(lhs < rhs), | |
| (Integer(lhs), Float(rhs)) => Boolean((lhs as f64) < rhs), | |
| (Float(lhs), Integer(rhs)) => Boolean(lhs < rhs as f64), | |
| (Float(lhs), Float(rhs)) => Boolean(lhs < rhs), | |
| (String(lhs), String(rhs)) => Boolean(lhs < rhs), | |
| (Null, _) | (_, Null) => Null, | |
| (lhs, rhs) => return errinput!("can't compare {lhs} and {rhs}"), | |
| }, | |
| Self::Is(expr, Null) => Boolean(expr.evaluate(row)? == Null), | |
| Self::Is(expr, Float(f)) if f.is_nan() => match expr.evaluate(row)? { | |
| Float(f) => Boolean(f.is_nan()), | |
| Null => Null, | |
| v => return errinput!("IS NAN can't be used with {}", v.datatype().unwrap()), | |
| }, | |
| Self::Is(_, v) => panic!("invalid IS value {v}"), // enforced by parser | |
| // Mathematical operations. Inputs must be numbers, but integers and | |
| // floats are interchangeable (float when mixed). NULLs yield NULL. | |
| // Errors on integer overflow, but floats yield infinity or NaN. | |
| Self::Add(lhs, rhs) => lhs.evaluate(row)?.checked_add(&rhs.evaluate(row)?)?, | |
| Self::Divide(lhs, rhs) => lhs.evaluate(row)?.checked_div(&rhs.evaluate(row)?)?, | |
| Self::Exponentiate(lhs, rhs) => lhs.evaluate(row)?.checked_pow(&rhs.evaluate(row)?)?, | |
| Self::Factorial(expr) => match expr.evaluate(row)? { | |
| Integer(i @ 0..) => { | |
| (1..=i).try_fold(Integer(1), |p, i| p.checked_mul(&Integer(i)))? | |
| } | |
| Null => Null, | |
| value => return errinput!("can't take factorial of {value}"), | |
| }, | |
| Self::Identity(expr) => match expr.evaluate(row)? { | |
| value @ (Integer(_) | Float(_) | Null) => value, | |
| expr => return errinput!("can't take the identity of {expr}"), | |
| }, | |
| Self::Multiply(lhs, rhs) => lhs.evaluate(row)?.checked_mul(&rhs.evaluate(row)?)?, | |
| Self::Negate(expr) => match expr.evaluate(row)? { | |
| Integer(i) => Integer(-i), | |
| Float(f) => Float(-f), | |
| Null => Null, | |
| value => return errinput!("can't negate {value}"), | |
| }, | |
| Self::Remainder(lhs, rhs) => lhs.evaluate(row)?.checked_rem(&rhs.evaluate(row)?)?, | |
| Self::SquareRoot(expr) => match expr.evaluate(row)? { | |
| Integer(i @ 0..) => Float((i as f64).sqrt()), | |
| Float(f) => Float(f.sqrt()), | |
| Null => Null, | |
| value => return errinput!("can't take square root of {value}"), | |
| }, | |
| Self::Subtract(lhs, rhs) => lhs.evaluate(row)?.checked_sub(&rhs.evaluate(row)?)?, | |
| // LIKE pattern matching, using _ and % as single- and | |
| // multi-character wildcards. Inputs must be strings. NULLs yield | |
| // NULL. There's no support for escaping an _ and %. | |
| Self::Like(lhs, rhs) => match (lhs.evaluate(row)?, rhs.evaluate(row)?) { | |
| (String(lhs), String(rhs)) => { | |
| // We could precompile the pattern if it's constant, instead | |
| // of recompiling it for every row, but we keep it simple. | |
| let pattern = | |
| format!("^{}$", regex::escape(&rhs).replace('%', ".*").replace('_', ".")); | |
| Boolean(Regex::new(&pattern)?.is_match(&lhs)) | |
| } | |
| (String(_), Null) | (Null, String(_)) | (Null, Null) => Null, | |
| (lhs, rhs) => return errinput!("can't LIKE {lhs} and {rhs}"), | |
| }, | |
| }) | |
| } |
Many of the comparison operations like == are implemented explicitly here instead of using
sql::types::Value comparisons. This is where we implement the SQL semantics of special values like
NULL, such that NULL = NULL yields NULL instead of TRUE.
For mathematical operations however, we generally dispatch to these methods on sql::types::Value:
Lines 185 to 295 in b2fe7b7
| /// Adds two values. Errors if invalid. | |
| pub fn checked_add(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) => match lhs.checked_add(*rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| }, | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 + rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs + *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs + rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't add {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Divides two values. Errors if invalid. | |
| pub fn checked_div(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(_), Integer(0)) => return errinput!("can't divide by zero"), | |
| (Integer(lhs), Integer(rhs)) => Integer(lhs / rhs), | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 / rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs / *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs / rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't divide {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Multiplies two values. Errors if invalid. | |
| pub fn checked_mul(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) => match lhs.checked_mul(*rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| }, | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 * rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs * *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs * rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't multiply {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Exponentiates two values. Errors if invalid. | |
| pub fn checked_pow(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) if *rhs >= 0 => { | |
| let rhs = (*rhs).try_into().or_else(|_| errinput!("integer overflow"))?; | |
| match lhs.checked_pow(rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| } | |
| } | |
| (Integer(lhs), Integer(rhs)) => Float((*lhs as f64).powf(*rhs as f64)), | |
| (Integer(lhs), Float(rhs)) => Float((*lhs as f64).powf(*rhs)), | |
| (Float(lhs), Integer(rhs)) => Float((lhs).powi(*rhs as i32)), | |
| (Float(lhs), Float(rhs)) => Float((lhs).powf(*rhs)), | |
| (Integer(_) | Float(_), Null) => Null, | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (lhs, rhs) => return errinput!("can't exponentiate {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Finds the remainder of two values. Errors if invalid. | |
| /// | |
| /// NB: uses the remainder, not modulo, like Postgres. This means that for | |
| /// negative values, the result has the sign of the dividend, rather than | |
| /// always returning a positive value. | |
| pub fn checked_rem(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(_), Integer(0)) => return errinput!("can't divide by zero"), | |
| (Integer(lhs), Integer(rhs)) => Integer(lhs % rhs), | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 % rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs % *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs % rhs), | |
| (Integer(_) | Float(_) | Null, Null) => Null, | |
| (Null, Integer(_) | Float(_)) => Null, | |
| (lhs, rhs) => return errinput!("can't take remainder of {lhs} and {rhs}"), | |
| }) | |
| } | |
| /// Subtracts two values. Errors if invalid. | |
| pub fn checked_sub(&self, other: &Self) -> Result<Self> { | |
| use Value::*; | |
| Ok(match (self, other) { | |
| (Integer(lhs), Integer(rhs)) => match lhs.checked_sub(*rhs) { | |
| Some(i) => Integer(i), | |
| None => return errinput!("integer overflow"), | |
| }, | |
| (Integer(lhs), Float(rhs)) => Float(*lhs as f64 - rhs), | |
| (Float(lhs), Integer(rhs)) => Float(lhs - *rhs as f64), | |
| (Float(lhs), Float(rhs)) => Float(lhs - rhs), | |
| (Null, Integer(_) | Float(_) | Null) => Null, | |
| (Integer(_) | Float(_), Null) => Null, | |
| (lhs, rhs) => return errinput!("can't subtract {lhs} and {rhs}"), | |
| }) | |
| } |
Expression parsing and evaluation is tested via test scripts in
sql/testscripts/expression.
← SQL Engine | SQL Storage →