Skip to content

Commit 64347c0

Browse files
committed
storage part 1
Signed-off-by: Alex Chi <[email protected]>
1 parent 5331faa commit 64347c0

File tree

5 files changed

+128
-17
lines changed

5 files changed

+128
-17
lines changed

code/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
target
22
Cargo.lock
3+
risinglight.db/

code/03-00/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77
[dependencies]
88
anyhow = "1"
99
bitvec = "1.0"
10+
bytes = "1"
1011
enum_dispatch = "0.3"
1112
env_logger = "0.9"
1213
futures = { version = "0.3", default-features = false, features = ["alloc"] }
@@ -17,7 +18,7 @@ prettytable-rs = { version = "0.8", default-features = false }
1718
rustyline = "9"
1819
sqlparser = "0.13"
1920
thiserror = "1"
20-
tokio = { version = "1", features = ["rt", "rt-multi-thread", "sync", "macros"] }
21+
tokio = { version = "1", features = ["rt", "rt-multi-thread", "sync", "macros", "fs"] }
2122
tokio-stream = "0.1"
2223

2324
[dev-dependencies]

code/03-00/src/executor/create.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,15 @@ impl CreateTableExecutor {
1515
let schema = self.catalog.get_schema(self.plan.schema_id).unwrap();
1616
let table_id = schema.add_table(&self.plan.table_name).unwrap();
1717
let table = schema.get_table(table_id).unwrap();
18+
let mut column_descs = vec![];
1819
for (name, desc) in &self.plan.columns {
1920
table.add_column(name, desc.clone()).unwrap();
21+
column_descs.push(desc.clone());
2022
}
21-
self.storage
22-
.add_table(TableRefId::new(self.plan.schema_id, table_id))?;
23+
self.storage.add_table(
24+
TableRefId::new(self.plan.schema_id, table_id),
25+
&column_descs,
26+
)?;
2327
yield DataChunk::single(1);
2428
}
2529
}

code/03-00/src/storage/mod.rs

Lines changed: 105 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
//! On-disk storage
22
3-
use std::sync::Arc;
3+
use std::collections::HashMap;
4+
use std::path::PathBuf;
5+
use std::sync::{Arc, RwLock};
46

5-
use crate::array::DataChunk;
6-
use crate::catalog::TableRefId;
7+
use anyhow::anyhow;
8+
use bytes::{Buf, BufMut};
9+
10+
use crate::array::{Array, ArrayBuilder, ArrayImpl, DataChunk, I32Array, I32ArrayBuilder};
11+
use crate::catalog::{ColumnDesc, TableRefId};
712

813
/// The error type of storage operations.
914
#[derive(thiserror::Error, Debug)]
@@ -17,13 +22,33 @@ pub type StorageRef = Arc<DiskStorage>;
1722
pub type StorageTableRef = Arc<DiskTable>;
1823

1924
/// On-disk storage.
20-
#[derive(Clone)]
21-
pub struct DiskStorage;
25+
pub struct DiskStorage {
26+
/// All tables in the current storage engine.
27+
tables: RwLock<HashMap<TableRefId, StorageTableRef>>,
28+
29+
/// The storage options.
30+
options: Arc<StorageOptions>,
31+
}
32+
33+
pub struct StorageOptions {
34+
/// The directory of the storage
35+
base_path: PathBuf,
36+
}
37+
38+
pub fn err(error: impl Into<anyhow::Error>) -> StorageError {
39+
StorageError(error.into())
40+
}
2241

2342
/// An on-disk table.
2443
pub struct DiskTable {
25-
#[allow(dead_code)]
44+
/// Id of the table.
2645
id: TableRefId,
46+
47+
/// Columns of the current table.
48+
column_descs: Arc<[ColumnDesc]>,
49+
50+
/// The storage options.
51+
options: Arc<StorageOptions>,
2752
}
2853

2954
impl Default for DiskStorage {
@@ -35,28 +60,94 @@ impl Default for DiskStorage {
3560
impl DiskStorage {
3661
/// Create a new in-memory storage.
3762
pub fn new() -> Self {
38-
DiskStorage
63+
DiskStorage {
64+
tables: RwLock::new(HashMap::new()),
65+
options: Arc::new(StorageOptions {
66+
base_path: "risinglight.db".into(),
67+
}),
68+
}
3969
}
4070

4171
/// Add a table.
42-
pub fn add_table(&self, _id: TableRefId) -> StorageResult<()> {
43-
todo!()
72+
pub fn add_table(&self, id: TableRefId, column_descs: &[ColumnDesc]) -> StorageResult<()> {
73+
let mut tables = self.tables.write().unwrap();
74+
let table = DiskTable {
75+
id,
76+
options: self.options.clone(),
77+
column_descs: column_descs.into(),
78+
};
79+
let res = tables.insert(id, table.into());
80+
if res.is_some() {
81+
return Err(anyhow!("table already exists: {:?}", id).into());
82+
}
83+
Ok(())
4484
}
4585

4686
/// Get a table.
47-
pub fn get_table(&self, _id: TableRefId) -> StorageResult<StorageTableRef> {
48-
todo!()
87+
pub fn get_table(&self, id: TableRefId) -> StorageResult<StorageTableRef> {
88+
let tables = self.tables.read().unwrap();
89+
tables
90+
.get(&id)
91+
.ok_or_else(|| anyhow!("table not found: {:?}", id).into())
92+
.cloned()
4993
}
5094
}
5195

96+
/// Encode an `I32Array` into a `Vec<u8>`.
97+
fn encode_int32_column(a: &I32Array) -> StorageResult<Vec<u8>> {
98+
let mut buffer = Vec::with_capacity(a.len() * 4);
99+
for item in a.iter() {
100+
if let Some(item) = item {
101+
buffer.put_i32_le(*item);
102+
} else {
103+
return Err(anyhow!("nullable encoding not supported!").into());
104+
}
105+
}
106+
Ok(buffer)
107+
}
108+
109+
fn decode_int32_column(mut data: &[u8]) -> StorageResult<I32Array> {
110+
let mut builder = I32ArrayBuilder::with_capacity(data.len() / 4);
111+
while data.has_remaining() {
112+
builder.push(Some(&data.get_i32_le()));
113+
}
114+
Ok(builder.finish())
115+
}
116+
52117
impl DiskTable {
118+
fn table_path(&self) -> PathBuf {
119+
self.options.base_path.join(self.id.table_id.to_string())
120+
}
121+
122+
fn column_path(&self, column_id: usize) -> PathBuf {
123+
self.table_path().join(format!("{}.col", column_id))
124+
}
125+
53126
/// Append a chunk to the table.
54-
pub async fn append(&self, _chunk: DataChunk) -> StorageResult<()> {
55-
todo!()
127+
pub async fn append(&self, chunk: DataChunk) -> StorageResult<()> {
128+
for (idx, column) in chunk.arrays().iter().enumerate() {
129+
if let ArrayImpl::Int32(column) = column {
130+
let column_path = self.column_path(idx);
131+
let data = encode_int32_column(column)?;
132+
tokio::fs::create_dir_all(column_path.parent().unwrap())
133+
.await
134+
.map_err(err)?;
135+
tokio::fs::write(column_path, data).await.map_err(err)?;
136+
} else {
137+
return Err(anyhow!("unsupported column type").into());
138+
}
139+
}
140+
Ok(())
56141
}
57142

58143
/// Get all chunks of the table.
59144
pub async fn all_chunks(&self) -> StorageResult<Vec<DataChunk>> {
60-
todo!()
145+
let mut columns = vec![];
146+
for (idx, _) in self.column_descs.iter().enumerate() {
147+
let column_path = self.column_path(idx);
148+
let data = tokio::fs::read(column_path).await.map_err(err)?;
149+
columns.push(decode_int32_column(&data)?);
150+
}
151+
Ok(vec![columns.into_iter().map(ArrayImpl::Int32).collect()])
61152
}
62153
}

code/sql/03-01.slt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# 03-01: Very simple storage test
2+
3+
statement ok
4+
CREATE TABLE t (a INT NOT NULL, b INT NOT NULL, c INT NOT NULL)
5+
6+
statement ok
7+
INSERT INTO t VALUES (1,10,100), (2,20,200), (3,30,300)
8+
9+
query III
10+
SELECT * FROM t
11+
----
12+
1 10 100
13+
2 20 200
14+
3 30 300

0 commit comments

Comments
 (0)