Skip to content

Commit 2edc9a3

Browse files
committed
Adding jitter to the metadata threads to avoid workers simultaneously requesting metadata
1 parent fe41ace commit 2edc9a3

File tree

3 files changed

+9
-7
lines changed

3 files changed

+9
-7
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/dataflow/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ ore = { path = "../ore" }
2929
pdqselect = "0.1.0"
3030
prometheus = { git = "https://github.com/MaterializeInc/rust-prometheus.git", default-features = false }
3131
prometheus-static-metric = { git = "https://github.com/MaterializeInc/rust-prometheus.git" }
32+
rand = "0.7.3"
3233
rdkafka = { git = "https://github.com/fede1024/rust-rdkafka.git", features = ["cmake-build", "ssl-vendored", "gssapi-vendored", "libz-static"] }
3334
regex = "1.3.9"
3435
repr = { path = "../repr" }

src/dataflow/src/source/kafka.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
// the Business Source License, use of this software will be governed
88
// by the Apache License, Version 2.0.
99

10+
use rand::Rng;
1011
use std::collections::{HashMap, HashSet, VecDeque};
1112
use std::convert::TryInto;
1213
use std::sync::atomic::{AtomicBool, Ordering};
@@ -667,8 +668,6 @@ struct ControlPlaneInfo {
667668
start_offset: MzOffset,
668669
/// Source Type (Real-time or BYO)
669670
source_type: Consistency,
670-
/// Number of records processed since capability was last downgraded
671-
record_count_since_downgrade: u64,
672671
}
673672

674673
impl ControlPlaneInfo {
@@ -685,7 +684,6 @@ impl ControlPlaneInfo {
685684
start_offset,
686685
source_type: consistency,
687686
time_since_downgrade: Instant::now(),
688-
record_count_since_downgrade: 0,
689687
}
690688
}
691689

@@ -765,6 +763,7 @@ fn activate_source_timestamping<G>(
765763

766764
/// This function is responsible for refreshing the number of known partitions. It marks the source
767765
/// has needing to be refreshed if new partitions are detected.
766+
#[allow(clippy::too_many_arguments)]
768767
fn metadata_fetch(
769768
timestamping_stopped: Arc<AtomicBool>,
770769
consumer: Arc<BaseConsumer<GlueConsumerContext>>,
@@ -790,6 +789,7 @@ fn metadata_fetch(
790789
}
791790

792791
let mut partition_kafka_metadata: HashMap<i32, IntGauge> = HashMap::new();
792+
let mut rng = rand::thread_rng();
793793

794794
while !timestamping_stopped.load(Ordering::SeqCst) {
795795
let metadata = consumer.fetch_metadata(Some(&topic), Duration::from_secs(30));
@@ -855,7 +855,10 @@ fn metadata_fetch(
855855
}
856856

857857
if new_partition_count > 0 {
858-
thread::sleep(wait);
858+
// Add jitter to spread-out metadata requests from workers. Brokers can get overloaded
859+
// if all workers make simultaneous metadata request calls.
860+
let sleep_jitter = rng.gen_range(Duration::from_secs(0), Duration::from_secs(15));
861+
thread::sleep(wait + sleep_jitter);
859862
} else {
860863
// If no partitions have been detected yet, sleep for a second rather than
861864
// the specified "wait" period of time, as we know that there should at least be one
@@ -1039,7 +1042,6 @@ where
10391042
&mut dp_info.partition_metrics.get_mut(&partition).unwrap();
10401043
partition_metrics.offset_ingested.set(offset.offset);
10411044
partition_metrics.messages_ingested.inc();
1042-
cp_info.record_count_since_downgrade += 1;
10431045
}
10441046
}
10451047

@@ -1271,7 +1273,6 @@ fn downgrade_capability(
12711273
if changed && min > 0 {
12721274
dp_info.source_metrics.capability.set(min);
12731275
cap.downgrade(&(&min + 1));
1274-
cp_info.record_count_since_downgrade = 0;
12751276
cp_info.last_closed_ts = min;
12761277
}
12771278
} else {
@@ -1286,7 +1287,6 @@ fn downgrade_capability(
12861287
cap.downgrade(&(&ts + 1));
12871288
}
12881289
cp_info.time_since_downgrade = Instant::now();
1289-
cp_info.record_count_since_downgrade = 0;
12901290
}
12911291
}
12921292
}

0 commit comments

Comments
 (0)