Skip to content

Commit 13feda0

Browse files
authored
track how much time the flush loop is stalled waiting for uploads (#9885)
## Problem We don't know how much time PS is losing during ingest when waiting for remote storage uploads in the flush frozen layer loop. Also we don't know how many remote storage requests get an permit without waiting (not throttled by remote_storage concurrency_limit). ## Summary of changes - Add a metric that accumulates the time waited per shard/PS - in [remote storage semaphore wait seconds](https://neonprod.grafana.net/d/febd9732-9bcf-4992-a821-49b1f6b02724/remote-storage?orgId=1&var-datasource=HUNg6jvVk&var-instance=pageserver-26.us-east-2.aws.neon.build&var-instance=pageserver-27.us-east-2.aws.neon.build&var-instance=pageserver-28.us-east-2.aws.neon.build&var-instance=pageserver-29.us-east-2.aws.neon.build&var-instance=pageserver-30.us-east-2.aws.neon.build&var-instance=pageserver-31.us-east-2.aws.neon.build&var-instance=pageserver-36.us-east-2.aws.neon.build&var-instance=pageserver-37.us-east-2.aws.neon.build&var-instance=pageserver-38.us-east-2.aws.neon.build&var-instance=pageserver-39.us-east-2.aws.neon.build&var-instance=pageserver-40.us-east-2.aws.neon.build&var-instance=pageserver-41.us-east-2.aws.neon.build&var-request_type=put_object&from=1731961336340&to=1731964762933&viewPanel=3) add a first bucket with 100 microseconds to count requests that do not need to wait on semaphore Update: created a new version that uses a Gauge (one increasing value per PS/shard) instead of histogram as suggested by review
1 parent 96a1b71 commit 13feda0

File tree

4 files changed

+32
-3
lines changed

4 files changed

+32
-3
lines changed

libs/remote_storage/src/metrics.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,9 @@ pub(crate) struct BucketMetrics {
176176

177177
impl Default for BucketMetrics {
178178
fn default() -> Self {
179-
let buckets = [0.01, 0.10, 0.5, 1.0, 5.0, 10.0, 50.0, 100.0];
179+
// first bucket 100 microseconds to count requests that do not need to wait at all
180+
// and get a permit immediately
181+
let buckets = [0.0001, 0.01, 0.10, 0.5, 1.0, 5.0, 10.0, 50.0, 100.0];
180182

181183
let req_seconds = register_histogram_vec!(
182184
"remote_storage_s3_request_seconds",

pageserver/src/metrics.rs

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use metrics::{
33
register_counter_vec, register_gauge_vec, register_histogram, register_histogram_vec,
44
register_int_counter, register_int_counter_pair_vec, register_int_counter_vec,
55
register_int_gauge, register_int_gauge_vec, register_uint_gauge, register_uint_gauge_vec,
6-
Counter, CounterVec, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
6+
Counter, CounterVec, Gauge, GaugeVec, Histogram, HistogramVec, IntCounter, IntCounterPair,
77
IntCounterPairVec, IntCounterVec, IntGauge, IntGaugeVec, UIntGauge, UIntGaugeVec,
88
};
99
use once_cell::sync::Lazy;
@@ -457,6 +457,15 @@ pub(crate) static WAIT_LSN_TIME: Lazy<Histogram> = Lazy::new(|| {
457457
.expect("failed to define a metric")
458458
});
459459

460+
static FLUSH_WAIT_UPLOAD_TIME: Lazy<GaugeVec> = Lazy::new(|| {
461+
register_gauge_vec!(
462+
"pageserver_flush_wait_upload_seconds",
463+
"Time spent waiting for preceding uploads during layer flush",
464+
&["tenant_id", "shard_id", "timeline_id"]
465+
)
466+
.expect("failed to define a metric")
467+
});
468+
460469
static LAST_RECORD_LSN: Lazy<IntGaugeVec> = Lazy::new(|| {
461470
register_int_gauge_vec!(
462471
"pageserver_last_record_lsn",
@@ -2336,6 +2345,7 @@ pub(crate) struct TimelineMetrics {
23362345
shard_id: String,
23372346
timeline_id: String,
23382347
pub flush_time_histo: StorageTimeMetrics,
2348+
pub flush_wait_upload_time_gauge: Gauge,
23392349
pub compact_time_histo: StorageTimeMetrics,
23402350
pub create_images_time_histo: StorageTimeMetrics,
23412351
pub logical_size_histo: StorageTimeMetrics,
@@ -2379,6 +2389,9 @@ impl TimelineMetrics {
23792389
&shard_id,
23802390
&timeline_id,
23812391
);
2392+
let flush_wait_upload_time_gauge = FLUSH_WAIT_UPLOAD_TIME
2393+
.get_metric_with_label_values(&[&tenant_id, &shard_id, &timeline_id])
2394+
.unwrap();
23822395
let compact_time_histo = StorageTimeMetrics::new(
23832396
StorageTimeOperation::Compact,
23842397
&tenant_id,
@@ -2516,6 +2529,7 @@ impl TimelineMetrics {
25162529
shard_id,
25172530
timeline_id,
25182531
flush_time_histo,
2532+
flush_wait_upload_time_gauge,
25192533
compact_time_histo,
25202534
create_images_time_histo,
25212535
logical_size_histo,
@@ -2563,6 +2577,14 @@ impl TimelineMetrics {
25632577
self.resident_physical_size_gauge.get()
25642578
}
25652579

2580+
pub(crate) fn flush_wait_upload_time_gauge_add(&self, duration: f64) {
2581+
self.flush_wait_upload_time_gauge.add(duration);
2582+
crate::metrics::FLUSH_WAIT_UPLOAD_TIME
2583+
.get_metric_with_label_values(&[&self.tenant_id, &self.shard_id, &self.timeline_id])
2584+
.unwrap()
2585+
.add(duration);
2586+
}
2587+
25662588
pub(crate) fn shutdown(&self) {
25672589
let was_shutdown = self
25682590
.shutdown
@@ -2579,6 +2601,7 @@ impl TimelineMetrics {
25792601
let timeline_id = &self.timeline_id;
25802602
let shard_id = &self.shard_id;
25812603
let _ = LAST_RECORD_LSN.remove_label_values(&[tenant_id, shard_id, timeline_id]);
2604+
let _ = FLUSH_WAIT_UPLOAD_TIME.remove_label_values(&[tenant_id, shard_id, timeline_id]);
25822605
let _ = STANDBY_HORIZON.remove_label_values(&[tenant_id, shard_id, timeline_id]);
25832606
{
25842607
RESIDENT_PHYSICAL_SIZE_GLOBAL.sub(self.resident_physical_size_get());

pageserver/src/tenant/timeline.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3830,7 +3830,8 @@ impl Timeline {
38303830
};
38313831

38323832
// Backpressure mechanism: wait with continuation of the flush loop until we have uploaded all layer files.
3833-
// This makes us refuse ingest until the new layers have been persisted to the remote.
3833+
// This makes us refuse ingest until the new layers have been persisted to the remote
3834+
let start = Instant::now();
38343835
self.remote_client
38353836
.wait_completion()
38363837
.await
@@ -3843,6 +3844,8 @@ impl Timeline {
38433844
FlushLayerError::Other(anyhow!(e).into())
38443845
}
38453846
})?;
3847+
let duration = start.elapsed().as_secs_f64();
3848+
self.metrics.flush_wait_upload_time_gauge_add(duration);
38463849

38473850
// FIXME: between create_delta_layer and the scheduling of the upload in `update_metadata_file`,
38483851
// a compaction can delete the file and then it won't be available for uploads any more.

test_runner/fixtures/metrics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ def counter(name: str) -> str:
168168
"pageserver_evictions_with_low_residence_duration_total",
169169
"pageserver_aux_file_estimated_size",
170170
"pageserver_valid_lsn_lease_count",
171+
"pageserver_flush_wait_upload_seconds",
171172
counter("pageserver_tenant_throttling_count_accounted_start"),
172173
counter("pageserver_tenant_throttling_count_accounted_finish"),
173174
counter("pageserver_tenant_throttling_wait_usecs_sum"),

0 commit comments

Comments
 (0)