Skip to content

Commit 7d2926a

Browse files
committed
compute,storage: record wallclock lag histograms
This commit extends the two controllers' `refresh_wallclock_lag` methods to also record `WallclockLagHistogram` introspection data. Both the histogram refresh time (i.e. cadence of persistent writes) and the period length can be configured through dyncfg flags.
1 parent 3f34667 commit 7d2926a

File tree

7 files changed

+209
-30
lines changed

7 files changed

+209
-30
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/compute-client/src/controller.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1081,7 +1081,7 @@ where
10811081
let op = StorageWriteOp::Append { updates };
10821082
storage.update_introspection_collection(type_, op);
10831083
}
1084-
WallclockLagHistory => {
1084+
WallclockLagHistory | WallclockLagHistogram => {
10851085
storage.append_introspection_updates(type_, updates);
10861086
}
10871087
_ => panic!("unexpected introspection type: {type_:?}"),

src/compute-client/src/controller/instance.rs

Lines changed: 72 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@ use mz_compute_types::sinks::{
2626
};
2727
use mz_compute_types::sources::SourceInstanceDesc;
2828
use mz_compute_types::ComputeInstanceId;
29-
use mz_controller_types::dyncfgs::WALLCLOCK_LAG_HISTORY_REFRESH_INTERVAL;
29+
use mz_controller_types::dyncfgs::{
30+
WALLCLOCK_LAG_HISTOGRAM_REFRESH_INTERVAL, WALLCLOCK_LAG_HISTORY_REFRESH_INTERVAL,
31+
};
3032
use mz_dyncfg::ConfigSet;
3133
use mz_expr::RowSetFinishing;
3234
use mz_ore::cast::CastFrom;
@@ -35,9 +37,10 @@ use mz_ore::now::NowFn;
3537
use mz_ore::tracing::OpenTelemetryContext;
3638
use mz_ore::{soft_assert_or_log, soft_panic_or_log};
3739
use mz_repr::adt::interval::Interval;
40+
use mz_repr::adt::timestamp::CheckedTimestamp;
3841
use mz_repr::refresh_schedule::RefreshSchedule;
39-
use mz_repr::{Datum, Diff, GlobalId, Row};
40-
use mz_storage_client::controller::IntrospectionType;
42+
use mz_repr::{Datum, DatumMap, Diff, GlobalId, Row};
43+
use mz_storage_client::controller::{IntrospectionType, WallclockLagHistogramPeriod};
4144
use mz_storage_types::read_holds::{self, ReadHold};
4245
use mz_storage_types::read_policy::ReadPolicy;
4346
use serde::Serialize;
@@ -291,6 +294,8 @@ pub(super) struct Instance<T: ComputeControllerTimestamp> {
291294
wallclock_lag: WallclockLagFn<T>,
292295
/// The last time `WallclockLagHistory` introspection was refreshed.
293296
wallclock_lag_history_last_refresh: Instant,
297+
/// The last time `WallclockLagHistogram` introspection was refreshed.
298+
wallclock_lag_histogram_last_refresh: Instant,
294299

295300
/// Sender for updates to collection read holds.
296301
///
@@ -555,17 +560,20 @@ impl<T: ComputeControllerTimestamp> Instance<T> {
555560
let refresh_history = !self.read_only
556561
&& self.wallclock_lag_history_last_refresh.elapsed()
557562
>= WALLCLOCK_LAG_HISTORY_REFRESH_INTERVAL.get(&self.dyncfg);
563+
let refresh_histogram = !self.read_only
564+
&& self.wallclock_lag_histogram_last_refresh.elapsed()
565+
>= WALLCLOCK_LAG_HISTOGRAM_REFRESH_INTERVAL.get(&self.dyncfg);
558566

559-
let mut history_updates = Vec::new();
560-
561-
let now = mz_ore::now::to_datetime((self.now)());
562-
let now_tz = now.try_into().expect("must fit");
567+
let now_ms = (self.now)();
568+
let now_dt = mz_ore::now::to_datetime(now_ms);
569+
let now_ts: CheckedTimestamp<_> = now_dt.try_into().expect("must fit");
563570

564571
let frontier_lag = |frontier: &Antichain<_>| match frontier.as_option() {
565572
Some(ts) => (self.wallclock_lag)(ts),
566573
None => Duration::ZERO,
567574
};
568575

576+
let mut history_updates = Vec::new();
569577
for (replica_id, replica) in &mut self.replicas {
570578
for (collection_id, collection) in &mut replica.collections {
571579
let lag = frontier_lag(&collection.write_frontier);
@@ -580,7 +588,7 @@ impl<T: ComputeControllerTimestamp> Instance<T> {
580588
Datum::String(&collection_id.to_string()),
581589
Datum::String(&replica_id.to_string()),
582590
Datum::Interval(Interval::new(0, 0, max_lag_us)),
583-
Datum::TimestampTz(now_tz),
591+
Datum::TimestampTz(now_ts),
584592
]);
585593
history_updates.push((row, 1));
586594
}
@@ -591,14 +599,46 @@ impl<T: ComputeControllerTimestamp> Instance<T> {
591599
};
592600
}
593601
}
594-
595602
if !history_updates.is_empty() {
596603
self.deliver_introspection_updates(
597604
IntrospectionType::WallclockLagHistory,
598605
history_updates,
599606
);
600607
self.wallclock_lag_history_last_refresh = Instant::now();
601608
}
609+
610+
let histogram_period = WallclockLagHistogramPeriod::from_epoch_millis(now_ms, &self.dyncfg);
611+
612+
let mut histogram_updates = Vec::new();
613+
for (collection_id, collection) in &mut self.collections {
614+
if let Some(stash) = &mut collection.wallclock_lag_histogram_stash {
615+
let lag = collection.shared.lock_write_frontier(|f| frontier_lag(f));
616+
let bucket = lag.as_secs().next_power_of_two();
617+
618+
let key = (histogram_period, bucket);
619+
*stash.entry(key).or_default() += 1;
620+
621+
if refresh_histogram {
622+
for ((period, lag), count) in std::mem::take(stash) {
623+
let row = Row::pack_slice(&[
624+
Datum::String(&collection_id.to_string()),
625+
Datum::UInt64(lag),
626+
Datum::TimestampTz(period.start),
627+
Datum::TimestampTz(period.end),
628+
Datum::Map(DatumMap::empty()),
629+
]);
630+
histogram_updates.push((row, count));
631+
}
632+
}
633+
}
634+
}
635+
if !histogram_updates.is_empty() {
636+
self.deliver_introspection_updates(
637+
IntrospectionType::WallclockLagHistogram,
638+
histogram_updates,
639+
);
640+
self.wallclock_lag_histogram_last_refresh = Instant::now();
641+
}
602642
}
603643

604644
/// Report updates (inserts or retractions) to the identified collection's dependencies.
@@ -819,6 +859,7 @@ impl<T: ComputeControllerTimestamp> Instance<T> {
819859
now: _,
820860
wallclock_lag: _,
821861
wallclock_lag_history_last_refresh,
862+
wallclock_lag_histogram_last_refresh,
822863
read_hold_tx: _,
823864
replica_tx: _,
824865
replica_rx: _,
@@ -854,6 +895,8 @@ impl<T: ComputeControllerTimestamp> Instance<T> {
854895
.map(|(id, epoch)| (id.to_string(), epoch))
855896
.collect();
856897
let wallclock_lag_history_last_refresh = format!("{wallclock_lag_history_last_refresh:?}");
898+
let wallclock_lag_histogram_last_refresh =
899+
format!("{wallclock_lag_histogram_last_refresh:?}");
857900

858901
let map = serde_json::Map::from_iter([
859902
field("initialized", initialized)?,
@@ -869,6 +912,10 @@ impl<T: ComputeControllerTimestamp> Instance<T> {
869912
"wallclock_lag_history_last_refresh",
870913
wallclock_lag_history_last_refresh,
871914
)?,
915+
field(
916+
"wallclock_lag_histogram_last_refresh",
917+
wallclock_lag_histogram_last_refresh,
918+
)?,
872919
]);
873920
Ok(serde_json::Value::Object(map))
874921
}
@@ -934,6 +981,7 @@ where
934981
now,
935982
wallclock_lag,
936983
wallclock_lag_history_last_refresh: Instant::now(),
984+
wallclock_lag_histogram_last_refresh: Instant::now(),
937985
read_hold_tx,
938986
replica_tx,
939987
replica_rx,
@@ -2155,6 +2203,12 @@ struct CollectionState<T: ComputeControllerTimestamp> {
21552203

21562204
/// Introspection state associated with this collection.
21572205
introspection: CollectionIntrospection<T>,
2206+
2207+
/// Frontier wallclock lag measurements stashed until the next `WallclockLagHistogram`
2208+
/// introspection update.
2209+
///
2210+
/// If this is `None`, wallclock lag is not tracked for this collection.
2211+
wallclock_lag_histogram_stash: Option<BTreeMap<(WallclockLagHistogramPeriod, u64), i64>>,
21582212
}
21592213

21602214
impl<T: ComputeControllerTimestamp> CollectionState<T> {
@@ -2189,6 +2243,14 @@ impl<T: ComputeControllerTimestamp> CollectionState<T> {
21892243
c.update_iter(updates);
21902244
});
21912245

2246+
// In an effort to keep the produced wallclock lag introspection data small and
2247+
// predictable, we disable wallclock lag tracking for transient collections, i.e. slow-path
2248+
// select indexes and subscribes.
2249+
let wallclock_lag_histogram_stash = match collection_id.is_transient() {
2250+
true => None,
2251+
false => Some(Default::default()),
2252+
};
2253+
21922254
Self {
21932255
log_collection: false,
21942256
dropped: false,
@@ -2200,6 +2262,7 @@ impl<T: ComputeControllerTimestamp> CollectionState<T> {
22002262
storage_dependencies,
22012263
compute_dependencies,
22022264
introspection,
2265+
wallclock_lag_histogram_stash,
22032266
}
22042267
}
22052268

src/controller-types/src/dyncfgs.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ pub const WALLCLOCK_LAG_HISTORY_REFRESH_INTERVAL: Config<Duration> = Config::new
3232
"The interval at which to refresh `WallclockLagHistory` introspection.",
3333
);
3434

35+
pub const WALLCLOCK_LAG_HISTOGRAM_REFRESH_INTERVAL: Config<Duration> = Config::new(
36+
"wallclock_lag_histogram_refresh_interval",
37+
Duration::from_secs(60),
38+
"The interval at which to refresh `WallclockLagHistogram` introspection.",
39+
);
40+
41+
pub const WALLCLOCK_LAG_HISTOGRAM_PERIOD_LENGTH: Config<Duration> = Config::new(
42+
"wallclock_lag_histogram_period_length",
43+
Duration::from_secs(24 * 60 * 60),
44+
"The period length of histograms in `WallclockLagHistogram` introspection.",
45+
);
46+
3547
pub const ENABLE_TIMELY_ZERO_COPY: Config<bool> = Config::new(
3648
"enable_timely_zero_copy",
3749
false,
@@ -56,6 +68,8 @@ pub fn all_dyncfgs(configs: ConfigSet) -> ConfigSet {
5668
.add(&CONTROLLER_PAST_GENERATION_REPLICA_CLEANUP_RETRY_INTERVAL)
5769
.add(&ENABLE_0DT_DEPLOYMENT_SOURCES)
5870
.add(&WALLCLOCK_LAG_HISTORY_REFRESH_INTERVAL)
71+
.add(&WALLCLOCK_LAG_HISTOGRAM_REFRESH_INTERVAL)
72+
.add(&WALLCLOCK_LAG_HISTOGRAM_PERIOD_LENGTH)
5973
.add(&ENABLE_TIMELY_ZERO_COPY)
6074
.add(&ENABLE_TIMELY_ZERO_COPY_LGALLOC)
6175
.add(&TIMELY_ZERO_COPY_LIMIT)

src/storage-client/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ itertools = { version = "0.12.1" }
2020
maplit = "1.0.2"
2121
mz-ccsr = { path = "../ccsr" }
2222
mz-cluster-client = { path = "../cluster-client" }
23+
mz-controller-types = { path = "../controller-types" }
24+
mz-dyncfg = { path = "../dyncfg" }
2325
mz-dyncfgs = { path = "../dyncfgs" }
2426
mz-kafka-util = { path = "../kafka-util" }
2527
mz-ore = { path = "../ore", features = ["async", "process", "tracing"] }

src/storage-client/src/controller.rs

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,16 @@ use std::sync::Arc;
2727
use std::time::Duration;
2828

2929
use async_trait::async_trait;
30+
use chrono::{DateTime, Utc};
3031
use differential_dataflow::lattice::Lattice;
3132
use mz_cluster_client::client::ClusterReplicaLocation;
3233
use mz_cluster_client::ReplicaId;
34+
use mz_controller_types::dyncfgs::WALLCLOCK_LAG_HISTOGRAM_PERIOD_LENGTH;
35+
use mz_dyncfg::ConfigSet;
36+
use mz_ore::soft_panic_or_log;
3337
use mz_persist_client::batch::ProtoBatch;
3438
use mz_persist_types::{Codec64, Opaque, ShardId};
39+
use mz_repr::adt::timestamp::CheckedTimestamp;
3540
use mz_repr::{Diff, GlobalId, RelationDesc, RelationVersion, Row};
3641
use mz_storage_types::configuration::StorageConfiguration;
3742
use mz_storage_types::connections::inline::InlinedConnection;
@@ -842,6 +847,36 @@ impl<T> MonotonicAppender<T> {
842847
}
843848
}
844849

850+
/// The period covered by a wallclock lag histogram, represented as a `[start, end)` range.
851+
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
852+
pub struct WallclockLagHistogramPeriod {
853+
pub start: CheckedTimestamp<DateTime<Utc>>,
854+
pub end: CheckedTimestamp<DateTime<Utc>>,
855+
}
856+
857+
impl WallclockLagHistogramPeriod {
858+
/// Construct a `WallclockLagHistogramPeriod` from the given epoch timestamp and dyncfg.
859+
pub fn from_epoch_millis(epoch_ms: u64, dyncfg: &ConfigSet) -> Self {
860+
let length = WALLCLOCK_LAG_HISTOGRAM_PERIOD_LENGTH.get(dyncfg);
861+
let length_ms = u64::try_from(length.as_millis()).unwrap_or_else(|_| {
862+
soft_panic_or_log!("excessive wallclock lag histogram period length: {length:?}");
863+
let default = WALLCLOCK_LAG_HISTOGRAM_PERIOD_LENGTH.default();
864+
u64::try_from(default.as_millis()).unwrap()
865+
});
866+
let length_ms = std::cmp::max(length_ms, 1);
867+
868+
let start_ms = epoch_ms - (epoch_ms % length_ms);
869+
let start_dt = mz_ore::now::to_datetime(start_ms);
870+
let start = start_dt.try_into().expect("must fit");
871+
872+
let end_ms = start_ms + length_ms;
873+
let end_dt = mz_ore::now::to_datetime(end_ms);
874+
let end = end_dt.try_into().expect("must fit");
875+
876+
Self { start, end }
877+
}
878+
}
879+
845880
#[cfg(test)]
846881
mod tests {
847882
use super::*;

0 commit comments

Comments
 (0)