diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..2a5fe433 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +# Helper variables (override on invocation if needed). +CARGO ?= cargo +WASM_PACK ?= wasm-pack +SQLLOGIC_PATH ?= tests/slt/**/*.slt + +.PHONY: test test-wasm test-slt test-all wasm-build + +## Run default Rust tests in the current environment (non-WASM). +test: + $(CARGO) test + +## Build the WebAssembly package (artifact goes to ./pkg). +wasm-build: + $(WASM_PACK) build --release --target nodejs + +## Execute wasm-bindgen tests under Node.js (wasm32 target). +test-wasm: + $(WASM_PACK) test --node --release + +## Run the sqllogictest harness against the configured .slt suite. +test-slt: + $(CARGO) run -p sqllogictest-test -- --path $(SQLLOGIC_PATH) + +## Convenience target to run every suite in sequence. +test-all: test test-wasm test-slt diff --git a/src/execution/dql/index_scan.rs b/src/execution/dql/index_scan.rs index 773e6ece..6660ddac 100644 --- a/src/execution/dql/index_scan.rs +++ b/src/execution/dql/index_scan.rs @@ -4,15 +4,31 @@ use crate::planner::operator::table_scan::TableScanOperator; use crate::storage::{Iter, StatisticsMetaCache, TableCache, Transaction, ViewCache}; use crate::throw; use crate::types::index::IndexMetaRef; +use crate::types::serialize::TupleValueSerializableImpl; pub(crate) struct IndexScan { op: TableScanOperator, index_by: IndexMetaRef, ranges: Vec, + covered_deserializers: Option>, } -impl From<(TableScanOperator, IndexMetaRef, Range)> for IndexScan { - fn from((op, index_by, range): (TableScanOperator, IndexMetaRef, Range)) -> Self { +impl + From<( + TableScanOperator, + IndexMetaRef, + Range, + Option>, + )> for IndexScan +{ + fn from( + (op, index_by, range, covered_deserializers): ( + TableScanOperator, + IndexMetaRef, + Range, + Option>, + ), + ) -> Self { let ranges = match range { Range::SortedRanges(ranges) => ranges, range => vec![range], @@ -22,6 +38,7 @@ impl From<(TableScanOperator, IndexMetaRef, Range)> for IndexScan { op, index_by, ranges, + covered_deserializers, } } } @@ -51,6 +68,7 @@ impl<'a, T: Transaction + 'a> ReadExecutor<'a, T> for IndexScan { self.index_by, self.ranges, with_pk, + self.covered_deserializers, ) ); diff --git a/src/execution/dql/sort.rs b/src/execution/dql/sort.rs index c89ba794..65c95233 100644 --- a/src/execution/dql/sort.rs +++ b/src/execution/dql/sort.rs @@ -183,13 +183,13 @@ impl SortBy { let mut key = BumpBytes::new_in(arena); expr.eval(Some((tuple, schema)))? - .memcomparable_encode(&mut key)?; - if !asc { - for byte in key.iter_mut() { + .memcomparable_encode_with_null_order(&mut key, *nulls_first)?; + + if !asc && key.len() > 1 { + for byte in key.iter_mut().skip(1) { *byte ^= 0xFF; } } - key.push(if *nulls_first { u8::MIN } else { u8::MAX }); full_key.extend(key); } sort_keys.put((i, full_key)) diff --git a/src/execution/dql/top_k.rs b/src/execution/dql/top_k.rs index c735fe31..390bc0a3 100644 --- a/src/execution/dql/top_k.rs +++ b/src/execution/dql/top_k.rs @@ -48,13 +48,12 @@ fn top_sort<'a>( { let mut key = BumpBytes::new_in(arena); expr.eval(Some((&tuple, &**schema)))? - .memcomparable_encode(&mut key)?; - if !asc { - for byte in key.iter_mut() { + .memcomparable_encode_with_null_order(&mut key, *nulls_first)?; + if !asc && key.len() > 1 { + for byte in key.iter_mut().skip(1) { *byte ^= 0xFF; } } - key.push(if *nulls_first { u8::MIN } else { u8::MAX }); full_key.extend(key); } diff --git a/src/execution/mod.rs b/src/execution/mod.rs index 1517722c..0a97198a 100644 --- a/src/execution/mod.rs +++ b/src/execution/mod.rs @@ -124,9 +124,11 @@ pub fn build_read<'a, T: Transaction + 'a>( if let Some(PhysicalOption::IndexScan(IndexInfo { meta, range: Some(range), + covered_deserializers, })) = plan.physical_option { - IndexScan::from((op, meta, range)).execute(cache, transaction) + IndexScan::from((op, meta, range, covered_deserializers)) + .execute(cache, transaction) } else { SeqScan::from(op).execute(cache, transaction) } diff --git a/src/optimizer/core/memo.rs b/src/optimizer/core/memo.rs index 678a0dde..4dcdedf3 100644 --- a/src/optimizer/core/memo.rs +++ b/src/optimizer/core/memo.rs @@ -212,6 +212,7 @@ mod tests { max: Bound::Unbounded, } ])), + covered_deserializers: None, })) ); diff --git a/src/optimizer/rule/implementation/dql/table_scan.rs b/src/optimizer/rule/implementation/dql/table_scan.rs index edb1d28e..91e88019 100644 --- a/src/optimizer/rule/implementation/dql/table_scan.rs +++ b/src/optimizer/rule/implementation/dql/table_scan.rs @@ -79,7 +79,9 @@ impl ImplementationRule for IndexScanImplementation { { let mut row_count = statistics_meta.collect_count(range)?; - if !matches!(index_info.meta.ty, IndexType::PrimaryKey { .. }) { + if index_info.covered_deserializers.is_none() + && !matches!(index_info.meta.ty, IndexType::PrimaryKey { .. }) + { // need to return table query(non-covering index) row_count *= 2; } diff --git a/src/optimizer/rule/normalization/pushdown_predicates.rs b/src/optimizer/rule/normalization/pushdown_predicates.rs index b56948e8..e3333a91 100644 --- a/src/optimizer/rule/normalization/pushdown_predicates.rs +++ b/src/optimizer/rule/normalization/pushdown_predicates.rs @@ -13,9 +13,9 @@ use crate::types::index::{IndexInfo, IndexMetaRef, IndexType}; use crate::types::value::DataValue; use crate::types::LogicalType; use itertools::Itertools; -use std::mem; use std::ops::Bound; use std::sync::LazyLock; +use std::{mem, slice}; static PUSH_PREDICATE_THROUGH_JOIN: LazyLock = LazyLock::new(|| Pattern { predicate: |op| matches!(op, Operator::Filter(_)), @@ -215,12 +215,17 @@ impl NormalizationRule for PushPredicateIntoScan { fn apply(&self, node_id: HepNodeId, graph: &mut HepGraph) -> Result<(), DatabaseError> { if let Operator::Filter(op) = graph.operator(node_id).clone() { if let Some(child_id) = graph.eldest_child_at(node_id) { - if let Operator::TableScan(child_op) = graph.operator_mut(child_id) { - //FIXME: now only support `unique` and `primary key` - for IndexInfo { meta, range } in &mut child_op.index_infos { + if let Operator::TableScan(scan_op) = graph.operator_mut(child_id) { + for IndexInfo { + meta, + range, + covered_deserializers, + } in &mut scan_op.index_infos + { if range.is_some() { continue; } + // range detach *range = match meta.ty { IndexType::PrimaryKey { is_multiple: false } | IndexType::Unique @@ -232,6 +237,28 @@ impl NormalizationRule for PushPredicateIntoScan { Self::composite_range(&op, meta)? } }; + // try index covered + let mut deserializers = Vec::with_capacity(meta.column_ids.len()); + let mut cover_count = 0; + let index_column_types = match &meta.value_ty { + LogicalType::Tuple(tys) => tys, + ty => slice::from_ref(ty), + }; + for (i, column_id) in meta.column_ids.iter().enumerate() { + for column in scan_op.columns.values() { + deserializers.push( + if column.id().map(|id| &id == column_id).unwrap_or(false) { + cover_count += 1; + column.datatype().serializable() + } else { + index_column_types[i].skip_serializable() + }, + ); + } + } + if cover_count == scan_op.columns.len() { + *covered_deserializers = Some(deserializers) + } } } } diff --git a/src/planner/operator/table_scan.rs b/src/planner/operator/table_scan.rs index f93eb521..df9b60af 100644 --- a/src/planner/operator/table_scan.rs +++ b/src/planner/operator/table_scan.rs @@ -48,6 +48,7 @@ impl TableScanOperator { .map(|meta| IndexInfo { meta: meta.clone(), range: None, + covered_deserializers: None, }) .collect_vec(); diff --git a/src/storage/memory.rs b/src/storage/memory.rs index 9056eba6..3a95dab2 100644 --- a/src/storage/memory.rs +++ b/src/storage/memory.rs @@ -218,6 +218,7 @@ mod wasm_tests { max: Bound::Included(DataValue::Int32(2)), }], true, + None, )?; let mut result = Vec::new(); @@ -349,6 +350,7 @@ mod native_tests { max: Bound::Included(DataValue::Int32(2)), }], true, + None, )?; let mut result = Vec::new(); diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 1d89d899..d37f31c7 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -99,8 +99,10 @@ pub trait Transaction: Sized { index_meta: IndexMetaRef, ranges: Vec, with_pk: bool, + covered_deserializers: Option>, ) -> Result, DatabaseError> { debug_assert!(columns.keys().all_unique()); + let values_len = columns.len(); let table = self .table(table_cache, table_name.clone())? @@ -113,9 +115,22 @@ pub trait Transaction: Sized { columns.insert(*i, column.clone()); } } - let (deserializers, remap_pk_indices) = - Self::create_deserializers(&columns, table, with_pk); - let inner = IndexImplEnum::instance(index_meta.ty); + let (inner, deserializers, remap_pk_indices) = + if let Some(deserializers) = covered_deserializers { + ( + IndexImplEnum::Covered(CoveredIndexImpl), + deserializers, + PrimaryKeyRemap::Covered, + ) + } else { + let (deserializers, remap_pk_indices) = + Self::create_deserializers(&columns, table, with_pk); + ( + IndexImplEnum::instance(index_meta.ty), + deserializers, + remap_pk_indices, + ) + }; Ok(IndexIter { offset, @@ -125,7 +140,7 @@ pub trait Transaction: Sized { index_meta, table_name, deserializers, - values_len: columns.len(), + values_len, total_len: table.columns_len(), tx: self, }, @@ -139,7 +154,7 @@ pub trait Transaction: Sized { columns: &BTreeMap, table: &TableCatalog, with_pk: bool, - ) -> (Vec, Option>) { + ) -> (Vec, PrimaryKeyRemap) { let primary_keys_indices = table.primary_keys_indices(); let mut deserializers = Vec::with_capacity(columns.len()); @@ -162,13 +177,17 @@ pub trait Transaction: Sized { deserializers.push(column.datatype().serializable()); last_projection = Some(*projection); } - let remap_pk_indices = with_pk.then(|| { - primary_keys_indices - .iter() - .filter_map(|pk| projections.binary_search(pk).ok()) - .collect_vec() - }); - (deserializers, remap_pk_indices) + let remap_pk = if with_pk { + PrimaryKeyRemap::Indices( + primary_keys_indices + .iter() + .filter_map(|pk| projections.binary_search(pk).ok()) + .collect_vec(), + ) + } else { + PrimaryKeyRemap::None + }; + (deserializers, remap_pk) } fn add_index_meta( @@ -457,10 +476,8 @@ pub trait Transaction: Sized { } }; match index_meta.ty { - IndexType::PrimaryKey { .. } | IndexType::Unique => { - return Err(DatabaseError::InvalidIndex) - } - IndexType::Normal | IndexType::Composite => (), + IndexType::PrimaryKey { .. } => return Err(DatabaseError::InvalidIndex), + IndexType::Unique | IndexType::Normal | IndexType::Composite => (), } let index_id = index_meta.id; @@ -758,15 +775,16 @@ pub trait Transaction: Sized { trait IndexImpl<'bytes, T: Transaction + 'bytes> { fn index_lookup( &self, - bytes: &Bytes, - pk_indices: Option<&[usize]>, + key: &Bytes, + value: &Bytes, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result; fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: Option<&[usize]>, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError>; @@ -783,6 +801,7 @@ enum IndexImplEnum { Unique(UniqueIndexImpl), Normal(NormalIndexImpl), Composite(CompositeIndexImpl), + Covered(CoveredIndexImpl), } impl IndexImplEnum { @@ -800,6 +819,7 @@ struct PrimaryKeyIndexImpl; struct UniqueIndexImpl; struct NormalIndexImpl; struct CompositeIndexImpl; +struct CoveredIndexImpl; struct IndexImplParams<'a, T: Transaction> { index_meta: IndexMetaRef, @@ -832,7 +852,7 @@ impl IndexImplParams<'_, T> { fn get_tuple_by_id( &self, - pk_indices: Option<&[usize]>, + pk_indices: &PrimaryKeyRemap, tuple_id: &TupleId, ) -> Result, DatabaseError> { let key = unsafe { &*self.table_codec() }.encode_tuple_key(self.table_name, tuple_id)?; @@ -860,22 +880,24 @@ enum IndexResult<'a, T: Transaction + 'a> { impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for IndexImplEnum { fn index_lookup( &self, - bytes: &Bytes, - pk_indices: Option<&[usize]>, + key: &Bytes, + value: &Bytes, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { match self { - IndexImplEnum::PrimaryKey(inner) => inner.index_lookup(bytes, pk_indices, params), - IndexImplEnum::Unique(inner) => inner.index_lookup(bytes, pk_indices, params), - IndexImplEnum::Normal(inner) => inner.index_lookup(bytes, pk_indices, params), - IndexImplEnum::Composite(inner) => inner.index_lookup(bytes, pk_indices, params), + IndexImplEnum::PrimaryKey(inner) => inner.index_lookup(key, value, pk_indices, params), + IndexImplEnum::Unique(inner) => inner.index_lookup(key, value, pk_indices, params), + IndexImplEnum::Normal(inner) => inner.index_lookup(key, value, pk_indices, params), + IndexImplEnum::Composite(inner) => inner.index_lookup(key, value, pk_indices, params), + IndexImplEnum::Covered(inner) => inner.index_lookup(key, value, pk_indices, params), } } fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: Option<&[usize]>, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { match self { @@ -883,6 +905,7 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for IndexImplEnum { IndexImplEnum::Unique(inner) => inner.eq_to_res(value, pk_indices, params), IndexImplEnum::Normal(inner) => inner.eq_to_res(value, pk_indices, params), IndexImplEnum::Composite(inner) => inner.eq_to_res(value, pk_indices, params), + IndexImplEnum::Covered(inner) => inner.eq_to_res(value, pk_indices, params), } } @@ -897,6 +920,7 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for IndexImplEnum { IndexImplEnum::Unique(inner) => inner.bound_key(params, value, is_upper), IndexImplEnum::Normal(inner) => inner.bound_key(params, value, is_upper), IndexImplEnum::Composite(inner) => inner.bound_key(params, value, is_upper), + IndexImplEnum::Covered(inner) => inner.bound_key(params, value, is_upper), } } } @@ -904,14 +928,15 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for IndexImplEnum { impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for PrimaryKeyIndexImpl { fn index_lookup( &self, - bytes: &Bytes, - pk_indices: Option<&[usize]>, + _: &Bytes, + value: &Bytes, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { TableCodec::decode_tuple( ¶ms.deserializers, pk_indices, - bytes, + value, params.values_len, params.total_len, ) @@ -920,7 +945,7 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for PrimaryKeyIndexIm fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: Option<&[usize]>, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { let tuple = params @@ -949,9 +974,10 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for PrimaryKeyIndexIm } } +#[inline(always)] fn secondary_index_lookup( bytes: &Bytes, - pk_indices: Option<&[usize]>, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { let tuple_id = TableCodec::decode_index(bytes)?; @@ -963,17 +989,18 @@ fn secondary_index_lookup( impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for UniqueIndexImpl { fn index_lookup( &self, - bytes: &Bytes, - pk_indices: Option<&[usize]>, + _: &Bytes, + value: &Bytes, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { - secondary_index_lookup(bytes, pk_indices, params) + secondary_index_lookup(value, pk_indices, params) } fn eq_to_res<'a>( &self, value: &DataValue, - pk_indices: Option<&[usize]>, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { let Some(bytes) = params.tx.get(&self.bound_key(params, value, false)?)? else { @@ -1001,26 +1028,21 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for UniqueIndexImpl { impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for NormalIndexImpl { fn index_lookup( &self, - bytes: &Bytes, - pk_indices: Option<&[usize]>, + _: &Bytes, + value: &Bytes, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { - secondary_index_lookup(bytes, pk_indices, params) + secondary_index_lookup(value, pk_indices, params) } fn eq_to_res<'a>( &self, value: &DataValue, - _: Option<&[usize]>, + _: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { - let min = self.bound_key(params, value, false)?; - let max = self.bound_key(params, value, true)?; - - let iter = params - .tx - .range(Bound::Included(min), Bound::Included(max))?; - Ok(IndexResult::Scope(iter)) + eq_to_res_scope(self, value, params) } fn bound_key( @@ -1042,26 +1064,21 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for NormalIndexImpl { impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CompositeIndexImpl { fn index_lookup( &self, - bytes: &Bytes, - pk_indices: Option<&[usize]>, + _: &Bytes, + value: &Bytes, + pk_indices: &PrimaryKeyRemap, params: &IndexImplParams, ) -> Result { - secondary_index_lookup(bytes, pk_indices, params) + secondary_index_lookup(value, pk_indices, params) } fn eq_to_res<'a>( &self, value: &DataValue, - _: Option<&[usize]>, + _: &PrimaryKeyRemap, params: &IndexImplParams<'a, T>, ) -> Result, DatabaseError> { - let min = self.bound_key(params, value, false)?; - let max = self.bound_key(params, value, true)?; - - let iter = params - .tx - .range(Bound::Included(min), Bound::Included(max))?; - Ok(IndexResult::Scope(iter)) + eq_to_res_scope(self, value, params) } fn bound_key( @@ -1080,10 +1097,73 @@ impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CompositeIndexImp } } +impl<'bytes, T: Transaction + 'bytes> IndexImpl<'bytes, T> for CoveredIndexImpl { + fn index_lookup( + &self, + key: &Bytes, + value: &Bytes, + pk_indices: &PrimaryKeyRemap, + params: &IndexImplParams, + ) -> Result { + let key = TableCodec::decode_index_key(key, params.value_ty())?; + + let mut tuple_id = None; + if matches!(pk_indices, PrimaryKeyRemap::Covered) { + tuple_id = Some(TableCodec::decode_index(value)?); + } + let values = match key { + DataValue::Tuple(vals, _) => vals, + v => { + vec![v] + } + }; + Ok(Tuple::new(tuple_id, values)) + } + + fn eq_to_res<'a>( + &self, + value: &DataValue, + _: &PrimaryKeyRemap, + params: &IndexImplParams<'a, T>, + ) -> Result, DatabaseError> { + eq_to_res_scope(self, value, params) + } + + fn bound_key( + &self, + params: &IndexImplParams, + value: &DataValue, + is_upper: bool, + ) -> Result, DatabaseError> { + let index = Index::new(params.index_meta.id, value, params.index_meta.ty); + + unsafe { &*params.table_codec() }.encode_index_bound_key( + params.table_name, + &index, + is_upper, + ) + } +} + +#[inline(always)] +fn eq_to_res_scope<'a, T: Transaction + 'a>( + index_impl: &impl IndexImpl<'a, T>, + value: &DataValue, + params: &IndexImplParams<'a, T>, +) -> Result, DatabaseError> { + let min = index_impl.bound_key(params, value, false)?; + let max = index_impl.bound_key(params, value, true)?; + + let iter = params + .tx + .range(Bound::Included(min), Bound::Included(max))?; + Ok(IndexResult::Scope(iter)) +} + pub struct TupleIter<'a, T: Transaction + 'a> { offset: usize, limit: Option, - remap_pk_indices: Option>, + remap_pk_indices: PrimaryKeyRemap, deserializers: Vec, values_len: usize, total_len: usize, @@ -1109,7 +1189,7 @@ impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { } let tuple = TableCodec::decode_tuple( &self.deserializers, - self.remap_pk_indices.as_deref(), + &self.remap_pk_indices, &value, self.values_len, self.total_len, @@ -1122,11 +1202,17 @@ impl<'a, T: Transaction + 'a> Iter for TupleIter<'a, T> { } } +pub enum PrimaryKeyRemap { + None, + Covered, + Indices(Vec), +} + pub struct IndexIter<'a, T: Transaction> { offset: usize, limit: Option, - remap_pk_indices: Option>, + remap_pk_indices: PrimaryKeyRemap, params: IndexImplParams<'a, T>, inner: IndexImplEnum, // for buffering data @@ -1230,7 +1316,7 @@ impl Iter for IndexIter<'_, T> { match self.inner.eq_to_res( &val, - self.remap_pk_indices.as_deref(), + &self.remap_pk_indices, &self.params, )? { IndexResult::Tuple(tuple) => { @@ -1249,14 +1335,15 @@ impl Iter for IndexIter<'_, T> { } } IndexIterState::Range(iter) => { - while let Some((_, bytes)) = iter.try_next()? { + while let Some((key, value)) = iter.try_next()? { if Self::offset_move(&mut self.offset) { continue; } Self::limit_sub(&mut self.limit); let tuple = self.inner.index_lookup( - &bytes, - self.remap_pk_indices.as_deref(), + &key, + &value, + &self.remap_pk_indices, &self.params, )?; @@ -1667,6 +1754,7 @@ mod test { max: Bound::Unbounded, }], true, + None, ) } diff --git a/src/storage/rocksdb.rs b/src/storage/rocksdb.rs index c705d763..cd3de784 100644 --- a/src/storage/rocksdb.rs +++ b/src/storage/rocksdb.rs @@ -230,7 +230,7 @@ mod test { use crate::storage::rocksdb::RocksStorage; use crate::storage::{ IndexImplEnum, IndexImplParams, IndexIter, IndexIterState, Iter, PrimaryKeyIndexImpl, - Storage, Transaction, + PrimaryKeyRemap, Storage, Transaction, }; use crate::types::index::{IndexMeta, IndexType}; use crate::types::tuple::Tuple; @@ -358,7 +358,7 @@ mod test { let mut iter = IndexIter { offset: 0, limit: None, - remap_pk_indices: Some(vec![0]), + remap_pk_indices: PrimaryKeyRemap::Indices(vec![0]), params: IndexImplParams { deserializers, index_meta: Arc::new(IndexMeta { @@ -426,6 +426,7 @@ mod test { max: Bound::Unbounded, }], true, + None, ) .unwrap(); @@ -451,6 +452,7 @@ mod test { max: Bound::Unbounded, }], true, + None, ) .unwrap(); @@ -462,4 +464,63 @@ mod test { Ok(()) } + + #[test] + fn test_read_by_index_cover() -> Result<(), DatabaseError> { + let temp_dir = TempDir::new().expect("unable to create temporary working directory"); + let kite_sql = DataBaseBuilder::path(temp_dir.path()).build()?; + kite_sql + .run("create table t1 (a int primary key, b int unique)")? + .done()?; + kite_sql + .run("insert into t1 (a, b) values (0, 0), (1, 1), (2, 2), (3, 4)")? + .done()?; + + let mut transaction = kite_sql.storage.transaction().unwrap(); + let table = transaction + .table(kite_sql.state.table_cache(), "t1".to_string().into())? + .unwrap() + .clone(); + let unique_index = table + .indexes + .iter() + .find(|index| matches!(index.ty, IndexType::Unique)) + .unwrap() + .clone(); + let (b_pos, b_column) = table + .columns() + .cloned() + .enumerate() + .find(|(_, column)| column.name() == "b") + .unwrap(); + let mut columns = BTreeMap::new(); + columns.insert(b_pos, b_column.clone()); + let covered_deserializers = vec![b_column.datatype().serializable()]; + + let target_pk = DataValue::Int32(3); + let covered_value = DataValue::Int32(4); + transaction.remove_tuple("t1", &target_pk)?; + + let mut iter = transaction.read_by_index( + kite_sql.state.table_cache(), + "t1".to_string().into(), + (Some(0), Some(1)), + columns, + unique_index, + vec![Range::Eq(covered_value.clone())], + false, + Some(covered_deserializers), + )?; + + let mut tuples = Vec::new(); + while let Some(tuple) = iter.next_tuple()? { + tuples.push(tuple); + } + + assert_eq!(tuples.len(), 1); + assert_eq!(tuples[0].pk, Some(target_pk)); + assert_eq!(tuples[0].values, vec![covered_value]); + + Ok(()) + } } diff --git a/src/storage/table_codec.rs b/src/storage/table_codec.rs index b46275ac..d6baca69 100644 --- a/src/storage/table_codec.rs +++ b/src/storage/table_codec.rs @@ -2,8 +2,8 @@ use crate::catalog::view::View; use crate::catalog::{ColumnRef, ColumnRelation, TableMeta}; use crate::errors::DatabaseError; use crate::serdes::{ReferenceSerialization, ReferenceTables}; -use crate::storage::{TableCache, Transaction}; -use crate::types::index::{Index, IndexId, IndexMeta, IndexType}; +use crate::storage::{PrimaryKeyRemap, TableCache, Transaction}; +use crate::types::index::{Index, IndexId, IndexMeta, IndexType, INDEX_ID_LEN}; use crate::types::serialize::TupleValueSerializableImpl; use crate::types::tuple::{Tuple, TupleId}; use crate::types::value::DataValue; @@ -16,6 +16,8 @@ use std::sync::LazyLock; pub(crate) const BOUND_MIN_TAG: u8 = u8::MIN; pub(crate) const BOUND_MAX_TAG: u8 = u8::MAX; +pub(crate) const NULL_TAG: u8 = 0u8; +pub(crate) const NOTNULL_TAG: u8 = 1u8; static ROOT_BYTES: LazyLock> = LazyLock::new(|| b"Root".to_vec()); static VIEW_BYTES: LazyLock> = LazyLock::new(|| b"View".to_vec()); @@ -285,7 +287,7 @@ impl TableCodec { #[inline] pub fn decode_tuple( deserializers: &[TupleValueSerializableImpl], - pk_indices: Option<&[usize]>, + pk_indices: &PrimaryKeyRemap, bytes: &[u8], values_len: usize, total_len: usize, @@ -383,6 +385,12 @@ impl TableCodec { Ok(key_prefix) } + pub fn decode_index_key(bytes: &[u8], ty: &LogicalType) -> Result { + // Hash + TypeTag + Bound Min + Index Id Len + Bound Min + let start = 8 + 1 + 1 + 1 + INDEX_ID_LEN; + DataValue::memcomparable_decode(&mut Cursor::new(&bytes[start..]), ty) + } + pub fn decode_index(bytes: &[u8]) -> Result { Ok(bincode::deserialize_from(&mut Cursor::new(bytes))?) } @@ -544,7 +552,7 @@ mod tests { use crate::serdes::ReferenceTables; use crate::storage::rocksdb::RocksTransaction; use crate::storage::table_codec::{BumpBytes, TableCodec}; - use crate::storage::Storage; + use crate::storage::{PrimaryKeyRemap, Storage}; use crate::types::index::{Index, IndexMeta, IndexType}; use crate::types::tuple::Tuple; use crate::types::value::DataValue; @@ -599,7 +607,13 @@ mod tests { tuple.pk = None; assert_eq!( - TableCodec::decode_tuple(&deserializers, None, &bytes, deserializers.len(), 2,)?, + TableCodec::decode_tuple( + &deserializers, + &PrimaryKeyRemap::None, + &bytes, + deserializers.len(), + 2, + )?, tuple ); diff --git a/src/types/index.rs b/src/types/index.rs index a1f8e2d8..71b8cbd4 100644 --- a/src/types/index.rs +++ b/src/types/index.rs @@ -2,6 +2,7 @@ use crate::catalog::{TableCatalog, TableName}; use crate::errors::DatabaseError; use crate::expression::range_detacher::Range; use crate::expression::ScalarExpression; +use crate::types::serialize::TupleValueSerializableImpl; use crate::types::value::DataValue; use crate::types::{ColumnId, LogicalType}; use kite_sql_serde_macros::ReferenceSerialization; @@ -12,6 +13,8 @@ use std::sync::Arc; pub type IndexId = u32; pub type IndexMetaRef = Arc; +pub const INDEX_ID_LEN: usize = 4; + #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, ReferenceSerialization)] pub enum IndexType { PrimaryKey { is_multiple: bool }, @@ -24,6 +27,7 @@ pub enum IndexType { pub struct IndexInfo { pub(crate) meta: IndexMetaRef, pub(crate) range: Option, + pub(crate) covered_deserializers: Option>, } #[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] @@ -78,6 +82,9 @@ impl fmt::Display for IndexInfo { } else { write!(f, "EMPTY")?; } + if self.covered_deserializers.is_some() { + write!(f, " Covered")?; + } Ok(()) } diff --git a/src/types/serialize.rs b/src/types/serialize.rs index 39948d47..7b2367de 100644 --- a/src/types/serialize.rs +++ b/src/types/serialize.rs @@ -3,6 +3,7 @@ use crate::types::value::{DataValue, Utf8Type}; use crate::types::LogicalType; use bumpalo::collections::Vec; use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; +use kite_sql_serde_macros::ReferenceSerialization; use ordered_float::OrderedFloat; use rust_decimal::Decimal; use sqlparser::ast::CharLengthUnits; @@ -41,7 +42,7 @@ pub trait TupleValueSerializable: Debug { } } -#[derive(Debug)] +#[derive(Debug, Clone, Eq, PartialEq, Hash, ReferenceSerialization)] pub enum TupleValueSerializableImpl { Boolean, Int8, diff --git a/src/types/tuple.rs b/src/types/tuple.rs index 54ed5f11..d57ea7ac 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -2,6 +2,7 @@ use crate::catalog::ColumnRef; use crate::db::ResultIter; use crate::errors::DatabaseError; use crate::storage::table_codec::BumpBytes; +use crate::storage::PrimaryKeyRemap; use crate::types::serialize::{TupleValueSerializable, TupleValueSerializableImpl}; use crate::types::value::DataValue; use bumpalo::Bump; @@ -36,7 +37,7 @@ impl Tuple { #[inline] pub fn deserialize_from( deserializers: &[TupleValueSerializableImpl], - pk_indices: Option<&[usize]>, + pk_remap: &PrimaryKeyRemap, bytes: &[u8], values_len: usize, total_len: usize, @@ -57,11 +58,13 @@ impl Tuple { } deserializer.filling_value(&mut cursor, &mut values)?; } + let pk = if let PrimaryKeyRemap::Indices(indices) = pk_remap { + Some(Tuple::primary_projection(indices, &values)) + } else { + None + }; - Ok(Tuple { - pk: pk_indices.map(|pk_indices| Tuple::primary_projection(pk_indices, &values)), - values, - }) + Ok(Tuple { pk, values }) } /// e.g.: bits(u8)..|data_0(len for utf8_1)|utf8_0|data_1| @@ -137,6 +140,7 @@ pub fn create_table(iter: I) -> Result { #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use crate::catalog::{ColumnCatalog, ColumnDesc, ColumnRef}; + use crate::storage::PrimaryKeyRemap; use crate::types::tuple::Tuple; use crate::types::value::{DataValue, Utf8Type}; use crate::types::LogicalType; @@ -325,7 +329,7 @@ mod tests { { let tuple_0 = Tuple::deserialize_from( &serializers, - Some(vec![0]).as_deref(), + &PrimaryKeyRemap::Indices(vec![0]), &tuples[0].serialize_to(&serializers, &arena).unwrap(), serializers.len(), columns.len(), @@ -337,7 +341,7 @@ mod tests { { let tuple_1 = Tuple::deserialize_from( &serializers, - Some(vec![0]).as_deref(), + &PrimaryKeyRemap::Indices(vec![0]), &tuples[1].serialize_to(&serializers, &arena).unwrap(), serializers.len(), columns.len(), @@ -356,7 +360,7 @@ mod tests { ]; let tuple_2 = Tuple::deserialize_from( &projection_serializers, - Some(vec![0]).as_deref(), + &PrimaryKeyRemap::Indices(vec![0]), &tuples[0].serialize_to(&serializers, &arena).unwrap(), 2, columns.len(), @@ -381,7 +385,7 @@ mod tests { let tuple_3 = Tuple::deserialize_from( &multiple_pk_serializers, - Some(vec![4, 2]).as_deref(), + &PrimaryKeyRemap::Indices(vec![4, 2]), &tuples[0].serialize_to(&serializers, &arena).unwrap(), serializers.len(), columns.len(), diff --git a/src/types/value.rs b/src/types/value.rs index 9a024b78..607770d7 100644 --- a/src/types/value.rs +++ b/src/types/value.rs @@ -1,6 +1,7 @@ use super::LogicalType; use crate::errors::DatabaseError; -use crate::storage::table_codec::{BumpBytes, BOUND_MAX_TAG, BOUND_MIN_TAG}; +use crate::storage::table_codec::{BumpBytes, BOUND_MAX_TAG, NOTNULL_TAG, NULL_TAG}; +use byteorder::ReadBytesExt; use chrono::format::{DelayedFormat, StrftimeItems}; use chrono::{DateTime, Datelike, NaiveDate, NaiveDateTime, NaiveTime, Timelike, Utc}; use itertools::Itertools; @@ -11,7 +12,7 @@ use sqlparser::ast::CharLengthUnits; use std::cmp::Ordering; use std::fmt::Formatter; use std::hash::Hash; -use std::io::Write; +use std::io::{Read, Write}; use std::str::FromStr; use std::sync::LazyLock; use std::{cmp, fmt, mem}; @@ -206,6 +207,14 @@ macro_rules! encode_u { }; } +macro_rules! decode_u { + ($reader:ident, $ty:ty) => {{ + let mut buf = [0u8; std::mem::size_of::<$ty>()]; + $reader.read_exact(&mut buf)?; + <$ty>::from_be_bytes(buf) + }}; +} + impl Eq for DataValue {} impl Hash for DataValue { @@ -576,43 +585,97 @@ impl DataValue { // // Refer: https://github.com/facebook/mysql-5.6/wiki/MyRocks-record-format#memcomparable-format #[inline] - fn encode_bytes(b: &mut BumpBytes, data: &[u8]) { + // FIXME + fn encode_string(b: &mut BumpBytes, data: &[u8]) { let d_len = data.len(); - let realloc_size = (d_len / ENCODE_GROUP_SIZE + 1) * (ENCODE_GROUP_SIZE + 1); - Self::realloc_bytes(b, realloc_size); + let needed_groups = d_len / ENCODE_GROUP_SIZE + 1; + Self::realloc_bytes(b, needed_groups * (ENCODE_GROUP_SIZE + 1)); let mut idx = 0; - while idx <= d_len { - let remain = d_len - idx; - let pad_count: usize; + + loop { + let remain = d_len.saturating_sub(idx); if remain >= ENCODE_GROUP_SIZE { b.extend_from_slice(&data[idx..idx + ENCODE_GROUP_SIZE]); - pad_count = 0; - } else { - pad_count = ENCODE_GROUP_SIZE - remain; + b.push(ENCODE_MARKER); + idx += ENCODE_GROUP_SIZE; + continue; + } + + let pad_count = ENCODE_GROUP_SIZE - remain; + + if remain > 0 { b.extend_from_slice(&data[idx..]); - b.extend_from_slice(&vec![0; pad_count]); + } + + for _ in 0..pad_count { + b.push(0); } b.push(ENCODE_MARKER - pad_count as u8); - idx += ENCODE_GROUP_SIZE; + break; } } #[inline] - fn realloc_bytes(b: &mut BumpBytes, size: usize) { - let len = b.len(); + fn decode_string(reader: &mut R) -> std::io::Result> { + let mut result = Vec::new(); + + loop { + let mut group = [0u8; ENCODE_GROUP_SIZE]; + reader.read_exact(&mut group)?; + + let mut marker = [0u8; 1]; + reader.read_exact(&mut marker)?; + let marker = marker[0]; - if size > len { - b.reserve(size - len); - b.resize(size, 0); + let pad_count = (ENCODE_MARKER - marker) as usize; + + if pad_count > ENCODE_GROUP_SIZE { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "memcomparable string marker out of range", + )); + } + + let data_len = ENCODE_GROUP_SIZE - pad_count; + result.extend_from_slice(&group[..data_len]); + + if pad_count != 0 { + break; + } } + + Ok(result) } #[inline] - pub fn memcomparable_encode(&self, b: &mut BumpBytes) -> Result<(), DatabaseError> { + fn realloc_bytes(b: &mut BumpBytes, size: usize) { + if size > 0 { + b.reserve(size); + } + } + + #[inline(always)] + pub fn memcomparable_encode_with_null_order( + &self, + b: &mut BumpBytes, + nulls_first: bool, + ) -> Result<(), DatabaseError> { + let (null_tag, not_null_tag) = if nulls_first { + (NULL_TAG, NOTNULL_TAG) + } else { + (NOTNULL_TAG, NULL_TAG) + }; + if let DataValue::Null = self { + b.push(null_tag); + return Ok(()); + } + b.push(not_null_tag); + match self { + DataValue::Null => (), DataValue::Int8(v) => encode_u!(b, *v as u8 ^ 0x80_u8), DataValue::Int16(v) => encode_u!(b, *v as u16 ^ 0x8000_u16), DataValue::Int32(v) | DataValue::Date32(v) => { @@ -625,7 +688,7 @@ impl DataValue { DataValue::UInt16(v) => encode_u!(b, v), DataValue::UInt32(v) | DataValue::Time32(v, ..) => encode_u!(b, v), DataValue::UInt64(v) => encode_u!(b, v), - DataValue::Utf8 { value: v, .. } => Self::encode_bytes(b, v.as_bytes()), + DataValue::Utf8 { value: v, .. } => Self::encode_string(b, v.as_bytes()), DataValue::Boolean(v) => b.push(if *v { b'1' } else { b'0' }), DataValue::Float32(f) => { let mut u = f.to_bits(); @@ -649,17 +712,14 @@ impl DataValue { encode_u!(b, u); } - DataValue::Null => (), DataValue::Decimal(v) => Self::serialize_decimal(*v, b)?, DataValue::Tuple(values, is_upper) => { let last = values.len() - 1; for (i, v) in values.iter().enumerate() { v.memcomparable_encode(b)?; - if (v.is_null() || i == last) && *is_upper { + if i == last && *is_upper { b.push(BOUND_MAX_TAG); - } else { - b.push(BOUND_MIN_TAG); } } } @@ -668,6 +728,90 @@ impl DataValue { Ok(()) } + #[inline] + pub fn memcomparable_encode(&self, b: &mut BumpBytes) -> Result<(), DatabaseError> { + self.memcomparable_encode_with_null_order(b, true) + } + + pub fn memcomparable_decode( + reader: &mut R, + ty: &LogicalType, + ) -> Result { + if reader.read_u8()? == 0u8 { + return Ok(DataValue::Null); + } + match ty { + LogicalType::SqlNull => Ok(DataValue::Null), + LogicalType::Tinyint => { + let u = decode_u!(reader, u8); + Ok(DataValue::Int8((u ^ 0x80) as i8)) + } + LogicalType::Smallint => { + let u = decode_u!(reader, u16); + Ok(DataValue::Int16((u ^ 0x8000) as i16)) + } + LogicalType::Integer | LogicalType::Date | LogicalType::Time(_) => { + let u = decode_u!(reader, u32); + Ok(DataValue::Int32((u ^ 0x8000_0000) as i32)) + } + LogicalType::Bigint | LogicalType::DateTime | LogicalType::TimeStamp(..) => { + let u = decode_u!(reader, u64); + Ok(DataValue::Int64((u ^ 0x8000_0000_0000_0000) as i64)) + } + LogicalType::UTinyint => Ok(DataValue::UInt8(decode_u!(reader, u8))), + LogicalType::USmallint => Ok(DataValue::UInt16(decode_u!(reader, u16))), + LogicalType::UInteger => Ok(DataValue::UInt32(decode_u!(reader, u32))), + LogicalType::UBigint => Ok(DataValue::UInt64(decode_u!(reader, u64))), + LogicalType::Float => { + let mut u = decode_u!(reader, u32); + + // 反向还原 + if (u & 0x8000_0000) != 0 { + u &= !0x8000_0000; + } else { + u = !u; + } + + Ok(DataValue::Float32(f32::from_bits(u).into())) + } + LogicalType::Double => { + let mut u = decode_u!(reader, u64); + + if (u & 0x8000_0000_0000_0000) != 0 { + u &= !0x8000_0000_0000_0000; + } else { + u = !u; + } + + Ok(DataValue::Float64(f64::from_bits(u).into())) + } + LogicalType::Boolean => { + let mut b = [0u8; 1]; + reader.read_exact(&mut b)?; + Ok(DataValue::Boolean(b[0] == b'1')) + } + LogicalType::Varchar(len, unit) => Ok(DataValue::Utf8 { + value: String::from_utf8(Self::decode_string(reader)?)?, + ty: Utf8Type::Variable(*len), + unit: *unit, + }), + LogicalType::Char(len, unit) => Ok(DataValue::Utf8 { + value: String::from_utf8(Self::decode_string(reader)?)?, + ty: Utf8Type::Fixed(*len), + unit: *unit, + }), + LogicalType::Decimal(..) => Ok(DataValue::Decimal(Self::deserialize_decimal(reader)?)), + LogicalType::Tuple(tys) => { + let mut values = Vec::with_capacity(tys.len()); + + for ty in tys { + values.push(Self::memcomparable_decode(reader, ty)?); + } + Ok(DataValue::Tuple(values, false)) + } + } + } + // https://github.com/risingwavelabs/memcomparable/blob/main/src/ser.rs#L468 pub fn serialize_decimal(decimal: Decimal, bytes: &mut BumpBytes) -> Result<(), DatabaseError> { if decimal.is_zero() { @@ -787,6 +931,64 @@ impl DataValue { (e100 as i8, byte_array) } + pub fn deserialize_decimal(mut reader: R) -> Result { + // decode exponent + let flag = reader.read_u8()?; + let exponent = match flag { + 0x08 => !reader.read_u8()? as i8, + 0x09..=0x13 => (0x13 - flag) as i8, + 0x14 => -(reader.read_u8()? as i8), + 0x15 => return Ok(Decimal::ZERO), + 0x16 => -!(reader.read_u8()? as i8), + 0x17..=0x21 => (flag - 0x17) as i8, + 0x22 => reader.read_u8()? as i8, + b => { + return Err(DatabaseError::InvalidValue(format!( + "invalid decimal exponent: {b}" + ))) + } + }; + // decode mantissa + let neg = (0x07..0x15).contains(&flag); + let mut mantissa: i128 = 0; + let mut mlen = 0i8; + loop { + let mut b = reader.read_u8()?; + if neg { + b = !b; + } + let x = b / 2; + mantissa = mantissa * 100 + x as i128; + mlen += 1; + if b & 1 == 0 { + break; + } + } + + // get scale + let mut scale = (mlen - exponent) * 2; + if scale <= 0 { + // e.g. 1(mantissa) + 2(exponent) (which is 100). + for _i in 0..-scale { + mantissa *= 10; + } + scale = 0; + } else if mantissa % 10 == 0 { + // Remove unnecessary zeros. + // e.g. 0.01_11_10 should be 0.01_11_1 + mantissa /= 10; + scale -= 1; + } + + if neg { + mantissa = -mantissa; + } + Ok(rust_decimal::Decimal::from_i128_with_scale( + mantissa, + scale as u32, + )) + } + #[inline] pub fn is_true(&self) -> Result { if self.is_null() { @@ -1939,10 +2141,13 @@ impl fmt::Debug for DataValue { mod test { use crate::errors::DatabaseError; use crate::storage::table_codec::BumpBytes; - use crate::types::value::DataValue; + use crate::types::value::{DataValue, Utf8Type}; + use crate::types::LogicalType; use bumpalo::Bump; use ordered_float::OrderedFloat; use rust_decimal::Decimal; + use sqlparser::ast::CharLengthUnits; + use std::io::Cursor; #[test] fn test_mem_comparable_null() -> Result<(), DatabaseError> { @@ -1952,10 +2157,15 @@ mod test { let mut key_i8_2 = BumpBytes::new_in(&arena); let mut key_i8_3 = BumpBytes::new_in(&arena); - DataValue::Null.memcomparable_encode(&mut key_i8_0)?; - DataValue::Int8(i8::MIN).memcomparable_encode(&mut key_i8_1)?; - DataValue::Int8(-1_i8).memcomparable_encode(&mut key_i8_2)?; - DataValue::Int8(i8::MAX).memcomparable_encode(&mut key_i8_3)?; + let value_0 = DataValue::Null; + let value_1 = DataValue::Int8(i8::MIN); + let value_2 = DataValue::Int8(-1_i8); + let value_3 = DataValue::Int8(i8::MAX); + + value_0.memcomparable_encode(&mut key_i8_0)?; + value_1.memcomparable_encode(&mut key_i8_1)?; + value_2.memcomparable_encode(&mut key_i8_2)?; + value_3.memcomparable_encode(&mut key_i8_3)?; println!("{:?} < {:?}", key_i8_0, key_i8_1); println!("{:?} < {:?}", key_i8_1, key_i8_2); @@ -1964,114 +2174,389 @@ mod test { assert!(key_i8_1 < key_i8_2); assert!(key_i8_2 < key_i8_3); + assert_eq!( + value_0, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_0.as_slice()), + &LogicalType::Tinyint + )? + ); + assert_eq!( + value_1, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_1.as_slice()), + &LogicalType::Tinyint + )? + ); + assert_eq!( + value_2, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_2.as_slice()), + &LogicalType::Tinyint + )? + ); + assert_eq!( + value_3, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_3.as_slice()), + &LogicalType::Tinyint + )? + ); + Ok(()) } #[test] fn test_mem_comparable_int() -> Result<(), DatabaseError> { let arena = Bump::new(); + + // ---------- Int8 ---------- + let mut key_i8_0 = BumpBytes::new_in(&arena); let mut key_i8_1 = BumpBytes::new_in(&arena); let mut key_i8_2 = BumpBytes::new_in(&arena); let mut key_i8_3 = BumpBytes::new_in(&arena); - DataValue::Int8(i8::MIN).memcomparable_encode(&mut key_i8_1)?; - DataValue::Int8(-1_i8).memcomparable_encode(&mut key_i8_2)?; - DataValue::Int8(i8::MAX).memcomparable_encode(&mut key_i8_3)?; + let v_i8_0 = DataValue::Null; + let v_i8_1 = DataValue::Int8(i8::MIN); + let v_i8_2 = DataValue::Int8(-1); + let v_i8_3 = DataValue::Int8(i8::MAX); - println!("{:?} < {:?}", key_i8_1, key_i8_2); - println!("{:?} < {:?}", key_i8_2, key_i8_3); + v_i8_0.memcomparable_encode(&mut key_i8_0)?; + v_i8_1.memcomparable_encode(&mut key_i8_1)?; + v_i8_2.memcomparable_encode(&mut key_i8_2)?; + v_i8_3.memcomparable_encode(&mut key_i8_3)?; + + assert!(key_i8_0 < key_i8_1); assert!(key_i8_1 < key_i8_2); assert!(key_i8_2 < key_i8_3); + assert_eq!( + v_i8_0, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_0.as_slice()), + &LogicalType::Tinyint + )? + ); + assert_eq!( + v_i8_1, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_1.as_slice()), + &LogicalType::Tinyint + )? + ); + assert_eq!( + v_i8_2, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_2.as_slice()), + &LogicalType::Tinyint + )? + ); + assert_eq!( + v_i8_3, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i8_3.as_slice()), + &LogicalType::Tinyint + )? + ); + + // ---------- Int16 ---------- + let mut key_i16_0 = BumpBytes::new_in(&arena); let mut key_i16_1 = BumpBytes::new_in(&arena); let mut key_i16_2 = BumpBytes::new_in(&arena); let mut key_i16_3 = BumpBytes::new_in(&arena); - DataValue::Int16(i16::MIN).memcomparable_encode(&mut key_i16_1)?; - DataValue::Int16(-1_i16).memcomparable_encode(&mut key_i16_2)?; - DataValue::Int16(i16::MAX).memcomparable_encode(&mut key_i16_3)?; + let v_i16_0 = DataValue::Null; + let v_i16_1 = DataValue::Int16(i16::MIN); + let v_i16_2 = DataValue::Int16(-1); + let v_i16_3 = DataValue::Int16(i16::MAX); + + v_i16_0.memcomparable_encode(&mut key_i16_0)?; + v_i16_1.memcomparable_encode(&mut key_i16_1)?; + v_i16_2.memcomparable_encode(&mut key_i16_2)?; + v_i16_3.memcomparable_encode(&mut key_i16_3)?; - println!("{:?} < {:?}", key_i16_1, key_i16_2); - println!("{:?} < {:?}", key_i16_2, key_i16_3); + assert!(key_i16_0 < key_i16_1); assert!(key_i16_1 < key_i16_2); assert!(key_i16_2 < key_i16_3); + assert_eq!( + v_i16_0, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i16_0.as_slice()), + &LogicalType::Smallint + )? + ); + assert_eq!( + v_i16_1, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i16_1.as_slice()), + &LogicalType::Smallint + )? + ); + assert_eq!( + v_i16_2, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i16_2.as_slice()), + &LogicalType::Smallint + )? + ); + assert_eq!( + v_i16_3, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i16_3.as_slice()), + &LogicalType::Smallint + )? + ); + + // ---------- Int32 ---------- + let mut key_i32_0 = BumpBytes::new_in(&arena); let mut key_i32_1 = BumpBytes::new_in(&arena); let mut key_i32_2 = BumpBytes::new_in(&arena); let mut key_i32_3 = BumpBytes::new_in(&arena); - DataValue::Int32(i32::MIN).memcomparable_encode(&mut key_i32_1)?; - DataValue::Int32(-1_i32).memcomparable_encode(&mut key_i32_2)?; - DataValue::Int32(i32::MAX).memcomparable_encode(&mut key_i32_3)?; + let v_i32_0 = DataValue::Null; + let v_i32_1 = DataValue::Int32(i32::MIN); + let v_i32_2 = DataValue::Int32(-1); + let v_i32_3 = DataValue::Int32(i32::MAX); + + v_i32_0.memcomparable_encode(&mut key_i32_0)?; + v_i32_1.memcomparable_encode(&mut key_i32_1)?; + v_i32_2.memcomparable_encode(&mut key_i32_2)?; + v_i32_3.memcomparable_encode(&mut key_i32_3)?; - println!("{:?} < {:?}", key_i32_1, key_i32_2); - println!("{:?} < {:?}", key_i32_2, key_i32_3); + assert!(key_i32_0 < key_i32_1); assert!(key_i32_1 < key_i32_2); assert!(key_i32_2 < key_i32_3); + assert_eq!( + v_i32_0, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i32_0.as_slice()), + &LogicalType::Integer + )? + ); + assert_eq!( + v_i32_1, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i32_1.as_slice()), + &LogicalType::Integer + )? + ); + assert_eq!( + v_i32_2, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i32_2.as_slice()), + &LogicalType::Integer + )? + ); + assert_eq!( + v_i32_3, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i32_3.as_slice()), + &LogicalType::Integer + )? + ); + + // ---------- Int64 ---------- + let mut key_i64_0 = BumpBytes::new_in(&arena); let mut key_i64_1 = BumpBytes::new_in(&arena); let mut key_i64_2 = BumpBytes::new_in(&arena); let mut key_i64_3 = BumpBytes::new_in(&arena); - DataValue::Int64(i64::MIN).memcomparable_encode(&mut key_i64_1)?; - DataValue::Int64(-1_i64).memcomparable_encode(&mut key_i64_2)?; - DataValue::Int64(i64::MAX).memcomparable_encode(&mut key_i64_3)?; + let v_i64_0 = DataValue::Null; + let v_i64_1 = DataValue::Int64(i64::MIN); + let v_i64_2 = DataValue::Int64(-1); + let v_i64_3 = DataValue::Int64(i64::MAX); - println!("{:?} < {:?}", key_i64_1, key_i64_2); - println!("{:?} < {:?}", key_i64_2, key_i64_3); + v_i64_0.memcomparable_encode(&mut key_i64_0)?; + v_i64_1.memcomparable_encode(&mut key_i64_1)?; + v_i64_2.memcomparable_encode(&mut key_i64_2)?; + v_i64_3.memcomparable_encode(&mut key_i64_3)?; + + assert!(key_i64_0 < key_i64_1); assert!(key_i64_1 < key_i64_2); assert!(key_i64_2 < key_i64_3); + assert_eq!( + v_i64_0, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i64_0.as_slice()), + &LogicalType::Bigint + )? + ); + assert_eq!( + v_i64_1, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i64_1.as_slice()), + &LogicalType::Bigint + )? + ); + assert_eq!( + v_i64_2, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i64_2.as_slice()), + &LogicalType::Bigint + )? + ); + assert_eq!( + v_i64_3, + DataValue::memcomparable_decode( + &mut Cursor::new(key_i64_3.as_slice()), + &LogicalType::Bigint + )? + ); + Ok(()) } #[test] fn test_mem_comparable_float() -> Result<(), DatabaseError> { let arena = Bump::new(); + + // ---------- Float32 ---------- + let mut key_f32_0 = BumpBytes::new_in(&arena); let mut key_f32_1 = BumpBytes::new_in(&arena); let mut key_f32_2 = BumpBytes::new_in(&arena); let mut key_f32_3 = BumpBytes::new_in(&arena); - DataValue::Float32(OrderedFloat(f32::MIN)).memcomparable_encode(&mut key_f32_1)?; - DataValue::Float32(OrderedFloat(-1_f32)).memcomparable_encode(&mut key_f32_2)?; - DataValue::Float32(OrderedFloat(f32::MAX)).memcomparable_encode(&mut key_f32_3)?; + let v_f32_0 = DataValue::Null; + let v_f32_1 = DataValue::Float32(OrderedFloat(f32::MIN)); + let v_f32_2 = DataValue::Float32(OrderedFloat(-1.0)); + let v_f32_3 = DataValue::Float32(OrderedFloat(f32::MAX)); + + v_f32_0.memcomparable_encode(&mut key_f32_0)?; + v_f32_1.memcomparable_encode(&mut key_f32_1)?; + v_f32_2.memcomparable_encode(&mut key_f32_2)?; + v_f32_3.memcomparable_encode(&mut key_f32_3)?; - println!("{:?} < {:?}", key_f32_1, key_f32_2); - println!("{:?} < {:?}", key_f32_2, key_f32_3); + assert!(key_f32_0 < key_f32_1); assert!(key_f32_1 < key_f32_2); assert!(key_f32_2 < key_f32_3); + assert_eq!( + v_f32_0, + DataValue::memcomparable_decode(&mut Cursor::new(&key_f32_0[..]), &LogicalType::Float)? + ); + assert_eq!( + v_f32_1, + DataValue::memcomparable_decode(&mut Cursor::new(&key_f32_1[..]), &LogicalType::Float)? + ); + assert_eq!( + v_f32_2, + DataValue::memcomparable_decode(&mut Cursor::new(&key_f32_2[..]), &LogicalType::Float)? + ); + assert_eq!( + v_f32_3, + DataValue::memcomparable_decode(&mut Cursor::new(&key_f32_3[..]), &LogicalType::Float)? + ); + + // ---------- Float64 ---------- + let mut key_f64_0 = BumpBytes::new_in(&arena); let mut key_f64_1 = BumpBytes::new_in(&arena); let mut key_f64_2 = BumpBytes::new_in(&arena); let mut key_f64_3 = BumpBytes::new_in(&arena); - DataValue::Float64(OrderedFloat(f64::MIN)).memcomparable_encode(&mut key_f64_1)?; - DataValue::Float64(OrderedFloat(-1_f64)).memcomparable_encode(&mut key_f64_2)?; - DataValue::Float64(OrderedFloat(f64::MAX)).memcomparable_encode(&mut key_f64_3)?; + let v_f64_0 = DataValue::Null; + let v_f64_1 = DataValue::Float64(OrderedFloat(f64::MIN)); + let v_f64_2 = DataValue::Float64(OrderedFloat(-1.0)); + let v_f64_3 = DataValue::Float64(OrderedFloat(f64::MAX)); - println!("{:?} < {:?}", key_f64_1, key_f64_2); - println!("{:?} < {:?}", key_f64_2, key_f64_3); + v_f64_0.memcomparable_encode(&mut key_f64_0)?; + v_f64_1.memcomparable_encode(&mut key_f64_1)?; + v_f64_2.memcomparable_encode(&mut key_f64_2)?; + v_f64_3.memcomparable_encode(&mut key_f64_3)?; + + assert!(key_f64_0 < key_f64_1); assert!(key_f64_1 < key_f64_2); assert!(key_f64_2 < key_f64_3); + assert_eq!( + v_f64_0, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_f64_0[..]), + &LogicalType::Double + )? + ); + assert_eq!( + v_f64_1, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_f64_1[..]), + &LogicalType::Double + )? + ); + assert_eq!( + v_f64_2, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_f64_2[..]), + &LogicalType::Double + )? + ); + assert_eq!( + v_f64_3, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_f64_3[..]), + &LogicalType::Double + )? + ); + Ok(()) } #[test] fn test_mem_comparable_decimal() -> Result<(), DatabaseError> { let arena = Bump::new(); - let mut key_deciaml_1 = BumpBytes::new_in(&arena); - let mut key_deciaml_2 = BumpBytes::new_in(&arena); - let mut key_deciaml_3 = BumpBytes::new_in(&arena); - DataValue::Decimal(Decimal::MIN).memcomparable_encode(&mut key_deciaml_1)?; - DataValue::Decimal(Decimal::new(-1, 0)).memcomparable_encode(&mut key_deciaml_2)?; - DataValue::Decimal(Decimal::MAX).memcomparable_encode(&mut key_deciaml_3)?; - - println!("{:?} < {:?}", key_deciaml_1, key_deciaml_2); - println!("{:?} < {:?}", key_deciaml_2, key_deciaml_3); - assert!(key_deciaml_1 < key_deciaml_2); - assert!(key_deciaml_2 < key_deciaml_3); + let mut key_decimal_0 = BumpBytes::new_in(&arena); + let mut key_decimal_1 = BumpBytes::new_in(&arena); + let mut key_decimal_2 = BumpBytes::new_in(&arena); + let mut key_decimal_3 = BumpBytes::new_in(&arena); + + let v_decimal_0 = DataValue::Null; + let v_decimal_1 = DataValue::Decimal(Decimal::MIN); + let v_decimal_2 = DataValue::Decimal(Decimal::new(-1, 0)); + let v_decimal_3 = DataValue::Decimal(Decimal::MAX); + + v_decimal_0.memcomparable_encode(&mut key_decimal_0)?; + v_decimal_1.memcomparable_encode(&mut key_decimal_1)?; + v_decimal_2.memcomparable_encode(&mut key_decimal_2)?; + v_decimal_3.memcomparable_encode(&mut key_decimal_3)?; + + println!("{:?} < {:?}", key_decimal_0, key_decimal_1); + println!("{:?} < {:?}", key_decimal_1, key_decimal_2); + println!("{:?} < {:?}", key_decimal_2, key_decimal_3); + + assert!(key_decimal_0 < key_decimal_1); + assert!(key_decimal_1 < key_decimal_2); + assert!(key_decimal_2 < key_decimal_3); + + assert_eq!( + v_decimal_0, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_decimal_0[..]), + &LogicalType::Decimal(None, None) + )? + ); + assert_eq!( + v_decimal_1, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_decimal_1[..]), + &LogicalType::Decimal(None, None) + )? + ); + assert_eq!( + v_decimal_2, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_decimal_2[..]), + &LogicalType::Decimal(None, None) + )? + ); + assert_eq!( + v_decimal_3, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_decimal_3[..]), + &LogicalType::Decimal(None, None) + )? + ); Ok(()) } @@ -2079,61 +2564,205 @@ mod test { #[test] fn test_mem_comparable_tuple_lower() -> Result<(), DatabaseError> { let arena = Bump::new(); + let mut key_tuple_1 = BumpBytes::new_in(&arena); let mut key_tuple_2 = BumpBytes::new_in(&arena); let mut key_tuple_3 = BumpBytes::new_in(&arena); - DataValue::Tuple( + let v_tuple_1 = DataValue::Tuple( vec![DataValue::Null, DataValue::Int8(0), DataValue::Int8(1)], false, - ) - .memcomparable_encode(&mut key_tuple_1)?; - DataValue::Tuple( + ); + + let v_tuple_2 = DataValue::Tuple( vec![DataValue::Int8(0), DataValue::Int8(0), DataValue::Int8(1)], false, - ) - .memcomparable_encode(&mut key_tuple_2)?; - DataValue::Tuple( + ); + + let v_tuple_3 = DataValue::Tuple( vec![DataValue::Int8(0), DataValue::Int8(0), DataValue::Int8(2)], false, - ) - .memcomparable_encode(&mut key_tuple_3)?; + ); + + v_tuple_1.memcomparable_encode(&mut key_tuple_1)?; + v_tuple_2.memcomparable_encode(&mut key_tuple_2)?; + v_tuple_3.memcomparable_encode(&mut key_tuple_3)?; println!("{:?} < {:?}", key_tuple_1, key_tuple_2); println!("{:?} < {:?}", key_tuple_2, key_tuple_3); + assert!(key_tuple_1 < key_tuple_2); assert!(key_tuple_2 < key_tuple_3); + assert_eq!( + v_tuple_1, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_tuple_1[..]), + &LogicalType::Tuple(vec![ + LogicalType::Tinyint, + LogicalType::Tinyint, + LogicalType::Tinyint, + ]) + )? + ); + assert_eq!( + v_tuple_2, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_tuple_2[..]), + &LogicalType::Tuple(vec![ + LogicalType::Tinyint, + LogicalType::Tinyint, + LogicalType::Tinyint, + ]) + )? + ); + assert_eq!( + v_tuple_3, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_tuple_3[..]), + &LogicalType::Tuple(vec![ + LogicalType::Tinyint, + LogicalType::Tinyint, + LogicalType::Tinyint, + ]) + )? + ); + Ok(()) } #[test] fn test_mem_comparable_tuple_upper() -> Result<(), DatabaseError> { + use DataValue::*; + + fn logical_eq(lhs: &DataValue, rhs: &DataValue) -> bool { + match (lhs, rhs) { + (Tuple(lv, _), Tuple(rv, _)) => { + lv.len() == rv.len() && lv.iter().zip(rv.iter()).all(|(l, r)| logical_eq(l, r)) + } + _ => lhs == rhs, + } + } + let arena = Bump::new(); + let mut key_tuple_1 = BumpBytes::new_in(&arena); let mut key_tuple_2 = BumpBytes::new_in(&arena); let mut key_tuple_3 = BumpBytes::new_in(&arena); - DataValue::Tuple( - vec![DataValue::Null, DataValue::Int8(0), DataValue::Int8(1)], - true, - ) - .memcomparable_encode(&mut key_tuple_1)?; - DataValue::Tuple( - vec![DataValue::Int8(0), DataValue::Int8(0), DataValue::Int8(1)], - true, - ) - .memcomparable_encode(&mut key_tuple_2)?; - DataValue::Tuple( - vec![DataValue::Int8(0), DataValue::Int8(0), DataValue::Int8(2)], - true, - ) - .memcomparable_encode(&mut key_tuple_3)?; + let v_tuple_1 = Tuple( + vec![Null, Int8(0), Int8(1)], + true, // upper bound + ); - println!("{:?} < {:?}", key_tuple_2, key_tuple_3); - println!("{:?} < {:?}", key_tuple_3, key_tuple_1); + let v_tuple_2 = Tuple(vec![Int8(0), Int8(0), Int8(1)], true); + + let v_tuple_3 = Tuple(vec![Int8(0), Int8(0), Int8(2)], true); + + v_tuple_1.memcomparable_encode(&mut key_tuple_1)?; + v_tuple_2.memcomparable_encode(&mut key_tuple_2)?; + v_tuple_3.memcomparable_encode(&mut key_tuple_3)?; + + assert!(key_tuple_1 < key_tuple_2); assert!(key_tuple_2 < key_tuple_3); - assert!(key_tuple_3 < key_tuple_1); + + let ty = LogicalType::Tuple(vec![ + LogicalType::Tinyint, + LogicalType::Tinyint, + LogicalType::Tinyint, + ]); + + let d1 = DataValue::memcomparable_decode(&mut Cursor::new(&key_tuple_1[..]), &ty)?; + let d2 = DataValue::memcomparable_decode(&mut Cursor::new(&key_tuple_2[..]), &ty)?; + let d3 = DataValue::memcomparable_decode(&mut Cursor::new(&key_tuple_3[..]), &ty)?; + + assert!(logical_eq(&v_tuple_1, &d1)); + assert!(logical_eq(&v_tuple_2, &d2)); + assert!(logical_eq(&v_tuple_3, &d3)); + + Ok(()) + } + + #[test] + fn test_mem_comparable_utf8() -> Result<(), DatabaseError> { + let arena = Bump::new(); + + let mut key_null = BumpBytes::new_in(&arena); + let mut key_a = BumpBytes::new_in(&arena); + let mut key_ab = BumpBytes::new_in(&arena); + let mut key_b = BumpBytes::new_in(&arena); + let mut key_zh = BumpBytes::new_in(&arena); + + let v_null = DataValue::Null; + + let v_a = DataValue::Utf8 { + value: "a".to_string(), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }; + + let v_ab = DataValue::Utf8 { + value: "ab".to_string(), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }; + + let v_b = DataValue::Utf8 { + value: "b".to_string(), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }; + + let v_zh = DataValue::Utf8 { + value: "中".to_string(), + ty: Utf8Type::Variable(None), + unit: CharLengthUnits::Characters, + }; + + v_null.memcomparable_encode(&mut key_null)?; + v_a.memcomparable_encode(&mut key_a)?; + v_ab.memcomparable_encode(&mut key_ab)?; + v_b.memcomparable_encode(&mut key_b)?; + v_zh.memcomparable_encode(&mut key_zh)?; + + // ordering + assert!(key_null < key_a); + assert!(key_a < key_ab); + assert!(key_ab < key_b); + assert!(key_b < key_zh); + + // decode check + assert_eq!( + v_a, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_a[..]), + &LogicalType::Varchar(None, CharLengthUnits::Characters) + )? + ); + + assert_eq!( + v_ab, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_ab[..]), + &LogicalType::Varchar(None, CharLengthUnits::Characters) + )? + ); + + assert_eq!( + v_b, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_b[..]), + &LogicalType::Varchar(None, CharLengthUnits::Characters) + )? + ); + + assert_eq!( + v_zh, + DataValue::memcomparable_decode( + &mut Cursor::new(&key_zh[..]), + &LogicalType::Varchar(None, CharLengthUnits::Characters) + )? + ); Ok(()) } diff --git a/tests/slt/where_by_index.slt b/tests/slt/where_by_index.slt index c7ca1880..4b95c750 100644 --- a/tests/slt/where_by_index.slt +++ b/tests/slt/where_by_index.slt @@ -198,5 +198,70 @@ select * from t1 where c2 > 0 and c2 < 10; 0 1 9 6 7 8 +# unique covered +query I rowsort +select c1 from t1 where c1 < 10; +---- +1 +7 + +# unique covered with primary key projection +query II rowsort +select c1, id from t1 where c1 < 10; +---- +1 0 +7 6 + +statement ok +drop index t1.u_c1_index; + +# normal covered +query II rowsort +select c2 from t1 where c2 < 10 and c2 > 0; +---- +8 +9 + +statement ok +drop index t1.c2_index; + +# composite covered +query II rowsort +select c1, c2 from t1 where c1 < 10 and c1 > 0 and c2 >0 and c2 < 10; +---- +1 9 +7 8 + +# composite covered projection reorder +query II rowsort +select c2, c1 from t1 where c1 < 10 and c1 > 0 and c2 > 0 and c2 < 10; +---- +8 7 +9 1 + + +statement ok +drop table t1; + +statement ok +create table t_cover(id int primary key, c1 int, c2 int, c3 int); + +statement ok +insert into t_cover values + (1, 1, 10, 11), + (2, 2, 20, 21), + (3, 2, 22, 23), + (4, 3, 30, 31); + +statement ok +create index idx_cover on t_cover (c1, c2, c3); + +# composite index trailing columns cover (index columns > output columns) +query II rowsort +select c2, c3 from t_cover where c1 = 2; +---- +20 21 +22 23 + statement ok -drop table t1; \ No newline at end of file +drop table t_cover;