Skip to content

Commit 75b8afe

Browse files
authored
feat: compaction integration (#997)
* feat: trigger compaction on flush * chore: rebase develop * feat: add config item max_file_in_level0 and remove compaction_after_flush * fix: cr comments * chore: add unit test to cover Timestamp::new_inclusive * fix: workaround to fix future is not Sync * fix: future is not sync * fix: some cr comments
1 parent e2904b9 commit 75b8afe

File tree

30 files changed

+515
-196
lines changed

30 files changed

+515
-196
lines changed

config/datanode.example.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,7 @@ metasrv_addrs = ['127.0.0.1:3002']
2424
timeout_millis = 3000
2525
connect_timeout_millis = 5000
2626
tcp_nodelay = false
27+
28+
[compaction]
29+
max_inflight_task = 4
30+
max_file_in_level0 = 16

src/catalog/src/system.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,7 @@ mod tests {
399399
use mito::config::EngineConfig;
400400
use mito::engine::MitoEngine;
401401
use object_store::ObjectStore;
402+
use storage::compaction::noop::NoopCompactionScheduler;
402403
use storage::config::EngineConfig as StorageEngineConfig;
403404
use storage::EngineImpl;
404405
use table::metadata::TableType;
@@ -485,12 +486,14 @@ mod tests {
485486
.build()
486487
.unwrap();
487488
let object_store = ObjectStore::new(accessor);
489+
let noop_compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
488490
let table_engine = Arc::new(MitoEngine::new(
489491
EngineConfig::default(),
490492
EngineImpl::new(
491493
StorageEngineConfig::default(),
492494
Arc::new(NoopLogStore::default()),
493495
object_store.clone(),
496+
noop_compaction_scheduler,
494497
),
495498
object_store,
496499
));

src/cmd/src/datanode.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ mod tests {
143143
use std::assert_matches::assert_matches;
144144
use std::time::Duration;
145145

146-
use datanode::datanode::ObjectStoreConfig;
146+
use datanode::datanode::{CompactionConfig, ObjectStoreConfig};
147147
use servers::Mode;
148148

149149
use super::*;
@@ -181,6 +181,14 @@ mod tests {
181181
ObjectStoreConfig::S3 { .. } => unreachable!(),
182182
ObjectStoreConfig::Oss { .. } => unreachable!(),
183183
};
184+
185+
assert_eq!(
186+
CompactionConfig {
187+
max_inflight_task: 4,
188+
max_file_in_level0: 16,
189+
},
190+
options.compaction
191+
);
184192
}
185193

186194
#[test]

src/common/time/src/range.rs

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ impl TimestampRange {
205205
pub fn new_inclusive(start: Option<Timestamp>, end: Option<Timestamp>) -> Self {
206206
// check for emptiness
207207
if let (Some(start_ts), Some(end_ts)) = (start, end) {
208-
if start_ts >= end_ts {
208+
if start_ts > end_ts {
209209
return Self::empty();
210210
}
211211
}
@@ -462,4 +462,29 @@ mod tests {
462462

463463
assert!(!full.intersects(&empty));
464464
}
465+
466+
#[test]
467+
fn test_new_inclusive() {
468+
let range = TimestampRange::new_inclusive(
469+
Some(Timestamp::new_millisecond(1)),
470+
Some(Timestamp::new_millisecond(3)),
471+
);
472+
assert!(!range.is_empty());
473+
assert!(range.contains(&Timestamp::new_millisecond(1)));
474+
assert!(range.contains(&Timestamp::new_millisecond(3)));
475+
476+
let range = TimestampRange::new_inclusive(
477+
Some(Timestamp::new_millisecond(1)),
478+
Some(Timestamp::new_millisecond(1)),
479+
);
480+
assert!(!range.is_empty());
481+
assert_eq!(1, range.start.unwrap().value());
482+
assert!(range.contains(&Timestamp::new_millisecond(1)));
483+
484+
let range = TimestampRange::new_inclusive(
485+
Some(Timestamp::new_millisecond(2)),
486+
Some(Timestamp::new_millisecond(1)),
487+
);
488+
assert!(range.is_empty());
489+
}
465490
}

src/datanode/src/datanode.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ use common_telemetry::info;
2020
use meta_client::MetaClientOpts;
2121
use serde::{Deserialize, Serialize};
2222
use servers::Mode;
23+
use storage::compaction::CompactionSchedulerConfig;
24+
use storage::config::EngineConfig as StorageEngineConfig;
2325

2426
use crate::error::Result;
2527
use crate::instance::{Instance, InstanceRef};
@@ -104,6 +106,40 @@ impl Default for WalConfig {
104106
}
105107
}
106108

109+
/// Options for table compaction
110+
#[derive(Debug, Clone, Serialize, Deserialize, Eq, PartialEq)]
111+
pub struct CompactionConfig {
112+
/// Max task number that can concurrently run.
113+
pub max_inflight_task: usize,
114+
/// Max files in level 0 to trigger compaction.
115+
pub max_file_in_level0: usize,
116+
}
117+
118+
impl Default for CompactionConfig {
119+
fn default() -> Self {
120+
Self {
121+
max_inflight_task: 4,
122+
max_file_in_level0: 8,
123+
}
124+
}
125+
}
126+
127+
impl From<&DatanodeOptions> for CompactionSchedulerConfig {
128+
fn from(value: &DatanodeOptions) -> Self {
129+
Self {
130+
max_inflight_task: value.compaction.max_inflight_task,
131+
}
132+
}
133+
}
134+
135+
impl From<&DatanodeOptions> for StorageEngineConfig {
136+
fn from(value: &DatanodeOptions) -> Self {
137+
Self {
138+
max_files_in_l0: value.compaction.max_file_in_level0,
139+
}
140+
}
141+
}
142+
107143
#[derive(Clone, Debug, Serialize, Deserialize)]
108144
#[serde(default)]
109145
pub struct DatanodeOptions {
@@ -117,6 +153,7 @@ pub struct DatanodeOptions {
117153
pub wal: WalConfig,
118154
pub storage: ObjectStoreConfig,
119155
pub enable_memory_catalog: bool,
156+
pub compaction: CompactionConfig,
120157
pub mode: Mode,
121158
}
122159

@@ -133,6 +170,7 @@ impl Default for DatanodeOptions {
133170
wal: WalConfig::default(),
134171
storage: ObjectStoreConfig::default(),
135172
enable_memory_catalog: false,
173+
compaction: CompactionConfig::default(),
136174
mode: Mode::Standalone,
137175
}
138176
}

src/datanode/src/instance.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,12 @@ use object_store::{util, ObjectStore};
3838
use query::query_engine::{QueryEngineFactory, QueryEngineRef};
3939
use servers::Mode;
4040
use snafu::prelude::*;
41+
use storage::compaction::{
42+
CompactionSchedulerConfig, CompactionSchedulerRef, LocalCompactionScheduler, SimplePicker,
43+
};
4144
use storage::config::EngineConfig as StorageEngineConfig;
4245
use storage::EngineImpl;
46+
use store_api::logstore::LogStore;
4347
use table::table::numbers::NumbersTable;
4448
use table::table::TableIdProviderRef;
4549
use table::Table;
@@ -92,12 +96,15 @@ impl Instance {
9296
}
9397
};
9498

99+
let compaction_scheduler = create_compaction_scheduler(opts);
100+
95101
let table_engine = Arc::new(DefaultEngine::new(
96102
TableEngineConfig::default(),
97103
EngineImpl::new(
98-
StorageEngineConfig::default(),
104+
StorageEngineConfig::from(opts),
99105
logstore.clone(),
100106
object_store.clone(),
107+
compaction_scheduler,
101108
),
102109
object_store,
103110
));
@@ -204,6 +211,13 @@ impl Instance {
204211
}
205212
}
206213

214+
fn create_compaction_scheduler<S: LogStore>(opts: &DatanodeOptions) -> CompactionSchedulerRef<S> {
215+
let picker = SimplePicker::default();
216+
let config = CompactionSchedulerConfig::from(opts);
217+
let scheduler = LocalCompactionScheduler::new(config, picker);
218+
Arc::new(scheduler)
219+
}
220+
207221
pub(crate) async fn new_object_store(store_config: &ObjectStoreConfig) -> Result<ObjectStore> {
208222
let object_store = match store_config {
209223
ObjectStoreConfig::File { .. } => new_fs_object_store(store_config).await,

src/datanode/src/mock.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use mito::config::EngineConfig as TableEngineConfig;
2424
use query::QueryEngineFactory;
2525
use servers::Mode;
2626
use snafu::ResultExt;
27+
use storage::compaction::noop::NoopCompactionScheduler;
2728
use storage::config::EngineConfig as StorageEngineConfig;
2829
use storage::EngineImpl;
2930
use table::metadata::TableId;
@@ -46,12 +47,14 @@ impl Instance {
4647
let object_store = new_object_store(&opts.storage).await?;
4748
let logstore = Arc::new(create_log_store(&opts.wal).await?);
4849
let meta_client = Arc::new(mock_meta_client(meta_srv, opts.node_id.unwrap_or(42)).await);
50+
let compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
4951
let table_engine = Arc::new(DefaultEngine::new(
5052
TableEngineConfig::default(),
5153
EngineImpl::new(
5254
StorageEngineConfig::default(),
5355
logstore.clone(),
5456
object_store.clone(),
57+
compaction_scheduler,
5558
),
5659
object_store,
5760
));

src/datanode/src/sql.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ mod tests {
150150
use query::parser::{QueryLanguageParser, QueryStatement};
151151
use query::QueryEngineFactory;
152152
use sql::statements::statement::Statement;
153+
use storage::compaction::noop::NoopCompactionScheduler;
153154
use storage::config::EngineConfig as StorageEngineConfig;
154155
use storage::EngineImpl;
155156
use table::engine::TableReference;
@@ -209,7 +210,7 @@ mod tests {
209210
let store_dir = dir.path().to_string_lossy();
210211
let accessor = Builder::default().root(&store_dir).build().unwrap();
211212
let object_store = ObjectStore::new(accessor);
212-
213+
let compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
213214
let sql = r#"insert into demo(host, cpu, memory, ts) values
214215
('host1', 66.6, 1024, 1655276557000),
215216
('host2', 88.8, 333.3, 1655276558000)
@@ -221,6 +222,7 @@ mod tests {
221222
StorageEngineConfig::default(),
222223
Arc::new(NoopLogStore::default()),
223224
object_store.clone(),
225+
compaction_scheduler,
224226
),
225227
object_store,
226228
));

src/mito/src/engine.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -605,6 +605,7 @@ mod tests {
605605
Float64Vector, Int32Vector, StringVector, TimestampMillisecondVector, VectorRef,
606606
};
607607
use log_store::NoopLogStore;
608+
use storage::compaction::noop::NoopCompactionScheduler;
608609
use storage::config::EngineConfig as StorageEngineConfig;
609610
use storage::region::RegionImpl;
610611
use storage::EngineImpl;
@@ -643,13 +644,14 @@ mod tests {
643644

644645
let (dir, object_store) =
645646
test_util::new_test_object_store("test_insert_with_column_default_constraint").await;
646-
647+
let compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
647648
let table_engine = MitoEngine::new(
648649
EngineConfig::default(),
649650
EngineImpl::new(
650651
StorageEngineConfig::default(),
651652
Arc::new(NoopLogStore::default()),
652653
object_store.clone(),
654+
compaction_scheduler,
653655
),
654656
object_store,
655657
);

src/mito/src/table/test_util.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ use datatypes::vectors::VectorRef;
2323
use log_store::NoopLogStore;
2424
use object_store::services::fs::Builder;
2525
use object_store::ObjectStore;
26+
use storage::compaction::noop::NoopCompactionScheduler;
2627
use storage::config::EngineConfig as StorageEngineConfig;
2728
use storage::EngineImpl;
2829
use table::engine::{EngineContext, TableEngine};
@@ -127,11 +128,12 @@ pub struct TestEngineComponents {
127128

128129
pub async fn setup_test_engine_and_table() -> TestEngineComponents {
129130
let (dir, object_store) = new_test_object_store("setup_test_engine_and_table").await;
130-
131+
let compaction_scheduler = Arc::new(NoopCompactionScheduler::default());
131132
let storage_engine = EngineImpl::new(
132133
StorageEngineConfig::default(),
133134
Arc::new(NoopLogStore::default()),
134135
object_store.clone(),
136+
compaction_scheduler,
135137
);
136138
let table_engine = MitoEngine::new(
137139
EngineConfig::default(),

0 commit comments

Comments
 (0)