Skip to content

Commit 55ecd3b

Browse files
authored
feat: bootstrap storage world (#5)
Signed-off-by: Alex Chi <[email protected]>
1 parent c719fbf commit 55ecd3b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+3165
-0
lines changed

code/03-00/Cargo.toml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
[package]
2+
name = "risinglight-03-00"
3+
version = "0.1.0"
4+
edition = "2021"
5+
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
6+
7+
[dependencies]
8+
bitvec = "1.0"
9+
enum_dispatch = "0.3"
10+
env_logger = "0.9"
11+
futures = { version = "0.3", default-features = false, features = ["alloc"] }
12+
futures-async-stream = { git = "https://github.com/taiki-e/futures-async-stream", rev = "944f407" }
13+
itertools = "0.10"
14+
log = "0.4"
15+
prettytable-rs = { version = "0.8", default-features = false }
16+
rustyline = "9"
17+
sqlparser = "0.13"
18+
thiserror = "1"
19+
tokio = { version = "1", features = ["rt", "rt-multi-thread", "sync", "macros"] }
20+
tokio-stream = "0.1"
21+
22+
[dev-dependencies]
23+
sqllogictest = "0.2"
24+
test-case = "1.2"

code/03-00/src/array/data_chunk.rs

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
use std::fmt;
2+
use std::sync::Arc;
3+
4+
use itertools::Itertools;
5+
6+
use super::*;
7+
8+
/// A collection of arrays.
9+
///
10+
/// A chunk is a horizontal subset of a query result.
11+
#[derive(PartialEq, Clone)]
12+
pub struct DataChunk {
13+
arrays: Arc<[ArrayImpl]>,
14+
}
15+
16+
/// Create [`DataChunk`] from a list of column arrays.
17+
impl FromIterator<ArrayImpl> for DataChunk {
18+
fn from_iter<I: IntoIterator<Item = ArrayImpl>>(iter: I) -> Self {
19+
let arrays = iter.into_iter().collect::<Arc<[ArrayImpl]>>();
20+
assert!(!arrays.is_empty());
21+
let cardinality = arrays[0].len();
22+
assert!(
23+
arrays.iter().map(|a| a.len()).all(|l| l == cardinality),
24+
"all arrays must have the same length"
25+
);
26+
DataChunk { arrays }
27+
}
28+
}
29+
30+
impl DataChunk {
31+
/// Return a [`DataChunk`] with 1 `item` in 1 array.
32+
pub fn single(item: i32) -> Self {
33+
DataChunk {
34+
arrays: [ArrayImpl::Int32([item].into_iter().collect())]
35+
.into_iter()
36+
.collect(),
37+
}
38+
}
39+
40+
/// Return the number of rows in the chunk.
41+
pub fn cardinality(&self) -> usize {
42+
self.arrays[0].len()
43+
}
44+
45+
/// Get all arrays.
46+
pub fn arrays(&self) -> &[ArrayImpl] {
47+
&self.arrays
48+
}
49+
50+
/// Concatenate multiple chunks into one.
51+
pub fn concat(chunks: &[DataChunk]) -> Self {
52+
assert!(!chunks.is_empty(), "must concat at least one chunk");
53+
let mut builders = chunks[0]
54+
.arrays()
55+
.iter()
56+
.map(ArrayBuilderImpl::from_type_of_array)
57+
.collect_vec();
58+
for chunk in chunks {
59+
for (array, builder) in chunk.arrays.iter().zip(builders.iter_mut()) {
60+
builder.append(array);
61+
}
62+
}
63+
builders.into_iter().map(|b| b.finish()).collect()
64+
}
65+
}
66+
67+
/// Print the chunk as a pretty table.
68+
impl fmt::Display for DataChunk {
69+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
70+
use prettytable::{format, Table};
71+
let mut table = Table::new();
72+
table.set_format(*format::consts::FORMAT_NO_LINESEP_WITH_TITLE);
73+
for i in 0..self.cardinality() {
74+
let row = self.arrays.iter().map(|a| a.get(i).to_string()).collect();
75+
table.add_row(row);
76+
}
77+
write!(f, "{}", table)
78+
}
79+
}
80+
81+
impl fmt::Debug for DataChunk {
82+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83+
write!(f, "{}", self)
84+
}
85+
}

code/03-00/src/array/iter.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
use std::iter::Iterator;
2+
use std::marker::PhantomData;
3+
4+
use super::Array;
5+
6+
/// An iterator over the elements of an [`Array`].
7+
#[derive(Clone)]
8+
pub struct ArrayIter<'a, A: Array> {
9+
array: &'a A,
10+
index: usize,
11+
_phantom: PhantomData<&'a usize>,
12+
}
13+
14+
impl<'a, A: Array> ArrayIter<'a, A> {
15+
/// Create an iterator over array.
16+
pub fn new(array: &'a A) -> Self {
17+
Self {
18+
array,
19+
index: 0,
20+
_phantom: PhantomData,
21+
}
22+
}
23+
}
24+
25+
impl<'a, A: Array> Iterator for ArrayIter<'a, A> {
26+
type Item = Option<&'a A::Item>;
27+
28+
fn next(&mut self) -> Option<Self::Item> {
29+
if self.index >= self.array.len() {
30+
None
31+
} else {
32+
let item = self.array.get(self.index);
33+
self.index += 1;
34+
Some(item)
35+
}
36+
}
37+
}

code/03-00/src/array/mod.rs

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,244 @@
1+
//! In-memory representations of a column values.
2+
3+
use std::convert::TryFrom;
4+
5+
use crate::types::{DataType, DataTypeKind, DataValue};
6+
7+
mod data_chunk;
8+
mod iter;
9+
mod primitive_array;
10+
mod utf8_array;
11+
12+
pub use self::data_chunk::*;
13+
pub use self::iter::ArrayIter;
14+
pub use self::primitive_array::*;
15+
pub use self::utf8_array::*;
16+
17+
/// A trait over all array builders.
18+
///
19+
/// [`ArrayBuilder`] is a trait over all builders. You could build an array with
20+
/// `push` with the help of [`ArrayBuilder`] trait. The `push` function always
21+
/// accepts reference to an element. e.g. for [`PrimitiveArray`],
22+
/// you must do `builder.push(Some(&1))`. For [`Utf8Array`], you must do
23+
/// `builder.push(Some("xxx"))`. Note that you don't need to construct a `String`.
24+
///
25+
/// The associated type `Array` is the type of the corresponding array. It is the
26+
/// return type of `finish`.
27+
pub trait ArrayBuilder: Send + Sync + 'static {
28+
/// Corresponding `Array` of this builder
29+
type Array: Array<Builder = Self>;
30+
31+
/// Create a new builder with `capacity`.
32+
fn with_capacity(capacity: usize) -> Self;
33+
34+
/// Append a value to builder.
35+
fn push(&mut self, value: Option<&<Self::Array as Array>::Item>);
36+
37+
/// Append an array to builder.
38+
fn append(&mut self, other: &Self::Array);
39+
40+
/// Finish build and return a new array.
41+
fn finish(self) -> Self::Array;
42+
}
43+
44+
/// A trait over all array.
45+
///
46+
/// [`Array`] must be built with an [`ArrayBuilder`]. The array trait provides several
47+
/// unified interface on an array, like `len`, `get` and `iter`.
48+
///
49+
/// The `Builder` associated type is the builder for this array.
50+
/// The `Item` is the item you could retrieve from this array.
51+
///
52+
/// For example, [`PrimitiveArray`] could return an `Option<&u32>`, and [`Utf8Array`] will
53+
/// return an `Option<&str>`.
54+
pub trait Array: Sized + Send + Sync + 'static {
55+
/// Corresponding builder of this array.
56+
type Builder: ArrayBuilder<Array = Self>;
57+
58+
/// Type of element in the array.
59+
type Item: ToOwned + ?Sized;
60+
61+
/// Retrieve a reference to value.
62+
fn get(&self, idx: usize) -> Option<&Self::Item>;
63+
64+
/// Number of items of array.
65+
fn len(&self) -> usize;
66+
67+
/// Get iterator of current array.
68+
fn iter(&self) -> ArrayIter<'_, Self> {
69+
ArrayIter::new(self)
70+
}
71+
72+
/// Check if the array has a length of 0.
73+
fn is_empty(&self) -> bool {
74+
self.len() == 0
75+
}
76+
}
77+
78+
pub type BoolArray = PrimitiveArray<bool>;
79+
pub type I32Array = PrimitiveArray<i32>;
80+
pub type F64Array = PrimitiveArray<f64>;
81+
82+
/// Embeds all types of arrays in `array` module.
83+
#[derive(Clone, PartialEq)]
84+
pub enum ArrayImpl {
85+
Bool(BoolArray),
86+
Int32(I32Array),
87+
Float64(F64Array),
88+
Utf8(Utf8Array),
89+
}
90+
91+
pub type BoolArrayBuilder = PrimitiveArrayBuilder<bool>;
92+
pub type I32ArrayBuilder = PrimitiveArrayBuilder<i32>;
93+
pub type F64ArrayBuilder = PrimitiveArrayBuilder<f64>;
94+
95+
/// Embeds all types of array builders in `array` module.
96+
pub enum ArrayBuilderImpl {
97+
Bool(BoolArrayBuilder),
98+
Int32(I32ArrayBuilder),
99+
Float64(F64ArrayBuilder),
100+
Utf8(Utf8ArrayBuilder),
101+
}
102+
103+
/// An error which can be returned when downcasting an [`ArrayImpl`] into a concrete type array.
104+
#[derive(Debug, Clone)]
105+
pub struct TypeMismatch;
106+
107+
macro_rules! impl_into {
108+
($x:ty, $y:ident) => {
109+
impl From<$x> for ArrayImpl {
110+
fn from(array: $x) -> Self {
111+
Self::$y(array)
112+
}
113+
}
114+
115+
impl TryFrom<ArrayImpl> for $x {
116+
type Error = TypeMismatch;
117+
118+
fn try_from(array: ArrayImpl) -> Result<Self, Self::Error> {
119+
match array {
120+
ArrayImpl::$y(array) => Ok(array),
121+
_ => Err(TypeMismatch),
122+
}
123+
}
124+
}
125+
126+
impl<'a> TryFrom<&'a ArrayImpl> for &'a $x {
127+
type Error = TypeMismatch;
128+
129+
fn try_from(array: &'a ArrayImpl) -> Result<Self, Self::Error> {
130+
match array {
131+
ArrayImpl::$y(array) => Ok(array),
132+
_ => Err(TypeMismatch),
133+
}
134+
}
135+
}
136+
};
137+
}
138+
139+
impl_into! { PrimitiveArray<bool>, Bool }
140+
impl_into! { PrimitiveArray<i32>, Int32 }
141+
impl_into! { PrimitiveArray<f64>, Float64 }
142+
impl_into! { Utf8Array, Utf8 }
143+
144+
impl ArrayBuilderImpl {
145+
/// Create a new array builder from data type.
146+
pub fn with_capacity(capacity: usize, ty: &DataType) -> Self {
147+
match ty.kind() {
148+
DataTypeKind::Boolean => Self::Bool(BoolArrayBuilder::with_capacity(capacity)),
149+
DataTypeKind::Int(_) => Self::Int32(I32ArrayBuilder::with_capacity(capacity)),
150+
DataTypeKind::Float(_) | DataTypeKind::Double => {
151+
Self::Float64(F64ArrayBuilder::with_capacity(capacity))
152+
}
153+
DataTypeKind::Char(_) | DataTypeKind::Varchar(_) | DataTypeKind::String => {
154+
Self::Utf8(Utf8ArrayBuilder::with_capacity(capacity))
155+
}
156+
_ => panic!("unsupported data type"),
157+
}
158+
}
159+
160+
/// Create a new array builder with the same type of given array.
161+
pub fn from_type_of_array(array: &ArrayImpl) -> Self {
162+
match array {
163+
ArrayImpl::Bool(_) => Self::Bool(BoolArrayBuilder::with_capacity(0)),
164+
ArrayImpl::Int32(_) => Self::Int32(I32ArrayBuilder::with_capacity(0)),
165+
ArrayImpl::Float64(_) => Self::Float64(F64ArrayBuilder::with_capacity(0)),
166+
ArrayImpl::Utf8(_) => Self::Utf8(Utf8ArrayBuilder::with_capacity(0)),
167+
}
168+
}
169+
170+
/// Appends an element to the back of array.
171+
pub fn push(&mut self, v: &DataValue) {
172+
match (self, v) {
173+
(Self::Bool(a), DataValue::Bool(v)) => a.push(Some(v)),
174+
(Self::Int32(a), DataValue::Int32(v)) => a.push(Some(v)),
175+
(Self::Float64(a), DataValue::Float64(v)) => a.push(Some(v)),
176+
(Self::Utf8(a), DataValue::String(v)) => a.push(Some(v)),
177+
(Self::Bool(a), DataValue::Null) => a.push(None),
178+
(Self::Int32(a), DataValue::Null) => a.push(None),
179+
(Self::Float64(a), DataValue::Null) => a.push(None),
180+
(Self::Utf8(a), DataValue::Null) => a.push(None),
181+
_ => panic!("failed to push value: type mismatch"),
182+
}
183+
}
184+
185+
/// Appends a [`ArrayImpl`].
186+
pub fn append(&mut self, array_impl: &ArrayImpl) {
187+
match (self, array_impl) {
188+
(Self::Bool(builder), ArrayImpl::Bool(arr)) => builder.append(arr),
189+
(Self::Int32(builder), ArrayImpl::Int32(arr)) => builder.append(arr),
190+
(Self::Float64(builder), ArrayImpl::Float64(arr)) => builder.append(arr),
191+
(Self::Utf8(builder), ArrayImpl::Utf8(arr)) => builder.append(arr),
192+
_ => panic!("failed to push value: type mismatch"),
193+
}
194+
}
195+
196+
/// Finish build and return a new array.
197+
pub fn finish(self) -> ArrayImpl {
198+
match self {
199+
Self::Bool(a) => ArrayImpl::Bool(a.finish()),
200+
Self::Int32(a) => ArrayImpl::Int32(a.finish()),
201+
Self::Float64(a) => ArrayImpl::Float64(a.finish()),
202+
Self::Utf8(a) => ArrayImpl::Utf8(a.finish()),
203+
}
204+
}
205+
}
206+
207+
impl ArrayImpl {
208+
/// Get the value at the given index.
209+
pub fn get(&self, idx: usize) -> DataValue {
210+
match self {
211+
Self::Bool(a) => match a.get(idx) {
212+
Some(val) => DataValue::Bool(*val),
213+
None => DataValue::Null,
214+
},
215+
Self::Int32(a) => match a.get(idx) {
216+
Some(val) => DataValue::Int32(*val),
217+
None => DataValue::Null,
218+
},
219+
Self::Float64(a) => match a.get(idx) {
220+
Some(val) => DataValue::Float64(*val),
221+
None => DataValue::Null,
222+
},
223+
Self::Utf8(a) => match a.get(idx) {
224+
Some(val) => DataValue::String(val.to_string()),
225+
None => DataValue::Null,
226+
},
227+
}
228+
}
229+
230+
/// Number of items of array.
231+
pub fn len(&self) -> usize {
232+
match self {
233+
Self::Bool(a) => a.len(),
234+
Self::Int32(a) => a.len(),
235+
Self::Float64(a) => a.len(),
236+
Self::Utf8(a) => a.len(),
237+
}
238+
}
239+
240+
/// Check if array is empty.
241+
pub fn is_empty(&self) -> bool {
242+
self.len() == 0
243+
}
244+
}

0 commit comments

Comments
 (0)