Skip to content

Commit a07db9f

Browse files
tonyxuqqiQi Xu
andauthored
server: make gc support multi-threads (tikv#16096)
close tikv#16101 do parallel region gc and expose the gc thread configuration. The configuration can be dynamically updated. Signed-off-by: Qi Xu <[email protected]> Co-authored-by: Qi Xu <[email protected]>
1 parent 462a597 commit a07db9f

File tree

7 files changed

+246
-65
lines changed

7 files changed

+246
-65
lines changed

components/tikv_util/src/worker/pool.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,14 @@ impl<T: Display + Send + 'static> LazyWorker<T> {
224224
pub fn remote(&self) -> Remote<yatp::task::future::TaskCell> {
225225
self.worker.remote()
226226
}
227+
228+
pub fn pool_size(&self) -> usize {
229+
self.worker.pool_size()
230+
}
231+
232+
pub fn pool(&self) -> FuturePool {
233+
self.worker.pool()
234+
}
227235
}
228236

229237
pub struct ReceiverWrapper<T: Display + Send> {
@@ -448,6 +456,14 @@ impl Worker {
448456
self.pool.remote().clone()
449457
}
450458

459+
pub fn pool_size(&self) -> usize {
460+
self.pool.get_pool_size()
461+
}
462+
463+
pub fn pool(&self) -> FuturePool {
464+
self.pool.clone()
465+
}
466+
451467
fn start_impl<R: Runnable + 'static>(
452468
&self,
453469
runner: R,

src/server/gc_worker/compaction_filter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -888,7 +888,7 @@ pub mod test_utils {
888888
cfg.ratio_threshold = ratio_threshold;
889889
}
890890
cfg.enable_compaction_filter = true;
891-
GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg)))
891+
GcWorkerConfigManager(Arc::new(VersionTrack::new(cfg)), None)
892892
};
893893
let feature_gate = {
894894
let feature_gate = FeatureGate::default();

src/server/gc_worker/config.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
use std::sync::Arc;
44

55
use online_config::{ConfigChange, ConfigManager, OnlineConfig};
6-
use tikv_util::config::{ReadableSize, VersionTrack};
6+
use tikv_util::{
7+
config::{ReadableSize, VersionTrack},
8+
yatp_pool::FuturePool,
9+
};
710

811
const DEFAULT_GC_RATIO_THRESHOLD: f64 = 1.1;
912
pub const DEFAULT_GC_BATCH_KEYS: usize = 512;
@@ -22,6 +25,8 @@ pub struct GcConfig {
2225
/// greater than 5.0.0. Change `compaction_filter_skip_version_check`
2326
/// can enable it by force.
2427
pub compaction_filter_skip_version_check: bool,
28+
/// gc threads count
29+
pub num_threads: usize,
2530
}
2631

2732
impl Default for GcConfig {
@@ -32,6 +37,7 @@ impl Default for GcConfig {
3237
max_write_bytes_per_sec: ReadableSize(DEFAULT_GC_MAX_WRITE_BYTES_PER_SEC),
3338
enable_compaction_filter: true,
3439
compaction_filter_skip_version_check: false,
40+
num_threads: 1,
3541
}
3642
}
3743
}
@@ -41,12 +47,15 @@ impl GcConfig {
4147
if self.batch_keys == 0 {
4248
return Err("gc.batch_keys should not be 0".into());
4349
}
50+
if self.num_threads == 0 {
51+
return Err("gc.thread_count should not be 0".into());
52+
}
4453
Ok(())
4554
}
4655
}
4756

4857
#[derive(Clone, Default)]
49-
pub struct GcWorkerConfigManager(pub Arc<VersionTrack<GcConfig>>);
58+
pub struct GcWorkerConfigManager(pub Arc<VersionTrack<GcConfig>>, pub Option<FuturePool>);
5059

5160
impl ConfigManager for GcWorkerConfigManager {
5261
fn dispatch(
@@ -55,6 +64,16 @@ impl ConfigManager for GcWorkerConfigManager {
5564
) -> std::result::Result<(), Box<dyn std::error::Error>> {
5665
{
5766
let change = change.clone();
67+
if let Some(pool) = self.1.as_ref() {
68+
if let Some(v) = change.get("num_threads") {
69+
let pool_size: usize = v.into();
70+
pool.scale_pool_size(pool_size);
71+
info!(
72+
"GC worker thread count is changed";
73+
"new_thread_count" => pool_size,
74+
);
75+
}
76+
}
5877
self.0
5978
.update(move |cfg: &mut GcConfig| cfg.update(change))?;
6079
}

src/server/gc_worker/gc_manager.rs

Lines changed: 76 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::{
44
cmp::Ordering,
55
sync::{
66
atomic::{AtomicU64, Ordering as AtomicOrdering},
7-
mpsc, Arc,
7+
mpsc, Arc, Condvar, Mutex,
88
},
99
thread::{self, Builder as ThreadBuilder, JoinHandle},
1010
time::Duration,
@@ -20,10 +20,10 @@ use txn_types::{Key, TimeStamp};
2020
use super::{
2121
compaction_filter::is_compaction_filter_allowed,
2222
config::GcWorkerConfigManager,
23-
gc_worker::{sync_gc, GcSafePointProvider, GcTask},
23+
gc_worker::{schedule_gc, GcSafePointProvider, GcTask},
2424
Result,
2525
};
26-
use crate::{server::metrics::*, tikv_util::sys::thread::StdThreadBuildWrapper};
26+
use crate::{server::metrics::*, storage::Callback, tikv_util::sys::thread::StdThreadBuildWrapper};
2727

2828
const POLL_SAFE_POINT_INTERVAL_SECS: u64 = 10;
2929

@@ -245,6 +245,8 @@ pub(super) struct GcManager<S: GcSafePointProvider, R: RegionInfoProvider, E: Kv
245245

246246
cfg_tracker: GcWorkerConfigManager,
247247
feature_gate: FeatureGate,
248+
249+
max_concurrent_tasks: usize,
248250
}
249251

250252
impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcManager<S, R, E> {
@@ -254,6 +256,7 @@ impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcMan
254256
worker_scheduler: Scheduler<GcTask<E>>,
255257
cfg_tracker: GcWorkerConfigManager,
256258
feature_gate: FeatureGate,
259+
concurrent_tasks: usize,
257260
) -> GcManager<S, R, E> {
258261
GcManager {
259262
cfg,
@@ -263,6 +266,7 @@ impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcMan
263266
gc_manager_ctx: GcManagerContext::new(),
264267
cfg_tracker,
265268
feature_gate,
269+
max_concurrent_tasks: concurrent_tasks,
266270
}
267271
}
268272

@@ -442,13 +446,27 @@ impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcMan
442446
let mut progress = Some(Key::from_encoded(BEGIN_KEY.to_vec()));
443447

444448
// Records how many region we have GC-ed.
445-
let mut processed_regions = 0;
449+
let mut scheduled_regions = 0;
450+
let task_controller = Arc::new((Mutex::new(0), Condvar::new()));
451+
// the task_controller is the <mutex,Condvar> combination to control the number
452+
// of tasks The mutex is used for protecting the number of current
453+
// tasks, while the condvar is used for notifying/get notified when the
454+
// number of current tasks is changed.
455+
let (lock, cvar) = &*task_controller;
456+
let maybe_wait = |max_tasks| {
457+
let mut current_tasks: std::sync::MutexGuard<'_, usize> = lock.lock().unwrap();
458+
while *current_tasks > max_tasks {
459+
// Wait until the number of current tasks is below the limit
460+
current_tasks = cvar.wait(current_tasks).unwrap();
461+
}
462+
};
446463

447464
info!("gc_worker: auto gc starts"; "safe_point" => self.curr_safe_point());
448465

449466
// The following loop iterates all regions whose leader is on this TiKV and does
450467
// GC on them. At the same time, check whether safe_point is updated
451468
// periodically. If it's updated, rewinding will happen.
469+
452470
loop {
453471
self.gc_manager_ctx.check_stopped()?;
454472
if is_compaction_filter_allowed(&self.cfg_tracker.value(), &self.feature_gate) {
@@ -462,9 +480,9 @@ impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcMan
462480
// We have worked to the end and we need to rewind. Restart from beginning.
463481
progress = Some(Key::from_encoded(BEGIN_KEY.to_vec()));
464482
need_rewind = false;
465-
info!("gc_worker: auto gc rewinds"; "processed_regions" => processed_regions);
483+
info!("gc_worker: auto gc rewinds"; "scheduled_regions" => scheduled_regions);
466484

467-
processed_regions = 0;
485+
scheduled_regions = 0;
468486
// Set the metric to zero to show that rewinding has happened.
469487
AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC
470488
.with_label_values(&[PROCESS_TYPE_GC])
@@ -483,19 +501,40 @@ impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcMan
483501
if finished {
484502
// We have worked to the end of the TiKV or our progress has reached `end`, and
485503
// we don't need to rewind. In this case, the round of GC has finished.
486-
info!("gc_worker: auto gc finishes"; "processed_regions" => processed_regions);
487-
return Ok(());
504+
info!("gc_worker: all regions task are scheduled";
505+
"processed_regions" => scheduled_regions,
506+
);
507+
break;
488508
}
489509
}
490-
491510
assert!(progress.is_some());
492511

493512
// Before doing GC, check whether safe_point is updated periodically to
494513
// determine if rewinding is needed.
495514
self.check_if_need_rewind(&progress, &mut need_rewind, &mut end);
496515

497-
progress = self.gc_next_region(progress.unwrap(), &mut processed_regions)?;
516+
let controller: Arc<(Mutex<usize>, Condvar)> = Arc::clone(&task_controller);
517+
let cb = Box::new(move |_res| {
518+
let (lock, cvar) = &*controller;
519+
let mut current_tasks = lock.lock().unwrap();
520+
*current_tasks -= 1;
521+
cvar.notify_one();
522+
AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC
523+
.with_label_values(&[PROCESS_TYPE_GC])
524+
.inc();
525+
});
526+
maybe_wait(self.max_concurrent_tasks - 1);
527+
let mut current_tasks = lock.lock().unwrap();
528+
progress = self.async_gc_next_region(progress.unwrap(), cb, &mut current_tasks)?;
529+
scheduled_regions += 1;
498530
}
531+
532+
// wait for all tasks finished
533+
self.gc_manager_ctx.check_stopped()?;
534+
maybe_wait(0);
535+
info!("gc_worker: auto gc finishes"; "processed_regions" => scheduled_regions);
536+
537+
Ok(())
499538
}
500539

501540
/// Checks whether we need to rewind in this round of GC. Only used in
@@ -536,13 +575,14 @@ impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcMan
536575
}
537576
}
538577

539-
/// Does GC on the next region after `from_key`. Returns the end key of the
540-
/// region it processed. If we have processed to the end of all regions,
541-
/// returns `None`.
542-
fn gc_next_region(
578+
/// Does GC on the next region after `from_key` asynchronously. Returns the
579+
/// end key of the region it processed. If we have processed to the end
580+
/// of all regions, returns `None`.
581+
fn async_gc_next_region(
543582
&mut self,
544583
from_key: Key,
545-
processed_regions: &mut usize,
584+
callback: Callback<()>,
585+
running_tasks: &mut usize,
546586
) -> GcManagerResult<Option<Key>> {
547587
// Get the information of the next region to do GC.
548588
let (region, next_key) = self.get_next_gc_context(from_key);
@@ -552,16 +592,16 @@ impl<S: GcSafePointProvider, R: RegionInfoProvider + 'static, E: KvEngine> GcMan
552592
let hex_end = format!("{:?}", log_wrappers::Value::key(region.get_end_key()));
553593
debug!("trying gc"; "region_id" => region.id, "start_key" => &hex_start, "end_key" => &hex_end);
554594

555-
if let Err(e) = sync_gc(&self.worker_scheduler, region, self.curr_safe_point()) {
556-
// Ignore the error and continue, since it's useless to retry this.
557-
// TODO: Find a better way to handle errors. Maybe we should retry.
558-
warn!("failed gc"; "start_key" => &hex_start, "end_key" => &hex_end, "err" => ?e);
559-
}
560-
561-
*processed_regions += 1;
562-
AUTO_GC_PROCESSED_REGIONS_GAUGE_VEC
563-
.with_label_values(&[PROCESS_TYPE_GC])
564-
.inc();
595+
let _ = schedule_gc(
596+
&self.worker_scheduler,
597+
region,
598+
self.curr_safe_point(),
599+
callback,
600+
)
601+
.map(|_| {
602+
*running_tasks += 1;
603+
Ok::<(), GcManagerError>(())
604+
});
565605

566606
Ok(next_key)
567607
}
@@ -710,8 +750,16 @@ mod tests {
710750
impl GcManagerTestUtil {
711751
pub fn new(regions: BTreeMap<Vec<u8>, RegionInfo>) -> Self {
712752
let (gc_task_sender, gc_task_receiver) = channel();
713-
let worker = WorkerBuilder::new("test-gc-manager").create();
714-
let scheduler = worker.start("gc-manager", MockGcRunner { tx: gc_task_sender });
753+
let worker = WorkerBuilder::new("test-gc-manager")
754+
.thread_count(2)
755+
.create();
756+
let scheduler = worker.start(
757+
"gc-manager",
758+
MockGcRunner {
759+
tx: gc_task_sender.clone(),
760+
},
761+
);
762+
worker.start("gc-manager", MockGcRunner { tx: gc_task_sender });
715763

716764
let (safe_point_sender, safe_point_receiver) = channel();
717765

@@ -731,6 +779,7 @@ mod tests {
731779
scheduler,
732780
GcWorkerConfigManager::default(),
733781
Default::default(),
782+
2,
734783
);
735784
Self {
736785
gc_manager: Some(gc_manager),

0 commit comments

Comments
 (0)