@@ -2761,8 +2761,19 @@ Note that the measurement does not include the duration for replicating the eval
2761
2761
Measurement : "Operations" ,
2762
2762
Help : "IO operations currently in progress on the store's disk (as reported by the OS)" ,
2763
2763
}
2764
- // The disk rate metrics are computed using data sampled on the interval,
2765
- // COCKROACH_DISK_STATS_POLLING_INTERVAL.
2764
+ // The max disk rate metrics are computed using data sampled at
2765
+ // DefaultDiskStatsPollingInterval, which defaults to 100ms, and scaled up
2766
+ // to be a per-second rate. This is useful to observe short duration spikes
2767
+ // which could result in throttling (and higher observed operation latency),
2768
+ // that are not visible by computing the rate over the counter metrics that
2769
+ // are sampled at the longer DefaultMetricsSampleInterval (10s).
2770
+ //
2771
+ // The expected usage is when a latency histogram, such as the fsync latency
2772
+ // or disk read latency shows high tail latency, while the normal rate
2773
+ // metrics show disk bandwidth and IOPS lower than the provisioned values.
2774
+ // If these max rate metrics show usage close to the provisioned value, one
2775
+ // can blame the high usage for the higher latency, and not blame it on
2776
+ // unrelated slowness in the disk infrastructure.
2766
2777
metaDiskReadMaxBytesPerSecond = metric.Metadata {
2767
2778
Name : "storage.disk.read-max.bytespersecond" ,
2768
2779
Unit : metric .Unit_BYTES ,
@@ -2775,6 +2786,18 @@ Note that the measurement does not include the duration for replicating the eval
2775
2786
Measurement : "Bytes" ,
2776
2787
Help : "Maximum rate at which bytes were written to disk (as reported by the OS)" ,
2777
2788
}
2789
+ metaDiskReadMaxIOPS = metric.Metadata {
2790
+ Name : "storage.disk.read-max.iops" ,
2791
+ Unit : metric .Unit_COUNT ,
2792
+ Measurement : "Operations" ,
2793
+ Help : "Maximum rate of read operations performed on the disk (as reported by the OS)" ,
2794
+ }
2795
+ metaDiskWriteMaxIOPS = metric.Metadata {
2796
+ Name : "storage.disk.write-max.iops" ,
2797
+ Unit : metric .Unit_COUNT ,
2798
+ Measurement : "Operations" ,
2799
+ Help : "Maximum rate of write operations performed on the disk (as reported by the OS)" ,
2800
+ }
2778
2801
)
2779
2802
2780
2803
// StoreMetrics is the set of metrics for a given store.
@@ -3212,6 +3235,8 @@ type StoreMetrics struct {
3212
3235
DiskIopsInProgress * metric.Gauge
3213
3236
DiskReadMaxBytesPerSecond * metric.Gauge
3214
3237
DiskWriteMaxBytesPerSecond * metric.Gauge
3238
+ DiskReadMaxIOPS * metric.Gauge
3239
+ DiskWriteMaxIOPS * metric.Gauge
3215
3240
}
3216
3241
3217
3242
// TenantsStorageMetrics are metrics which are aggregated over all tenants
@@ -3990,6 +4015,8 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
3990
4015
DiskIopsInProgress : metric .NewGauge (metaDiskIopsInProgress ),
3991
4016
DiskReadMaxBytesPerSecond : metric .NewGauge (metaDiskReadMaxBytesPerSecond ),
3992
4017
DiskWriteMaxBytesPerSecond : metric .NewGauge (metaDiskWriteMaxBytesPerSecond ),
4018
+ DiskReadMaxIOPS : metric .NewGauge (metaDiskReadMaxIOPS ),
4019
+ DiskWriteMaxIOPS : metric .NewGauge (metaDiskWriteMaxIOPS ),
3993
4020
3994
4021
// Estimated MVCC stats in split.
3995
4022
SplitsWithEstimatedStats : metric .NewCounter (metaSplitEstimatedStats ),
@@ -4262,11 +4289,14 @@ func (sm *StoreMetrics) updateDiskStats(
4262
4289
log .Errorf (ctx , "not updating cumulative stats due to %s" , cumulativeStatsErr )
4263
4290
}
4264
4291
maxRollingStats := rollingStats .Max ()
4265
- // maxRollingStats is computed as the change in stats every 100ms, so we
4266
- // scale them to represent the change in stats every 1s.
4292
+ // maxRollingStats is computed as the change in stats every 100ms
4293
+ // (DefaultDiskStatsPollingInterval), so we scale them to represent the
4294
+ // change in stats every 1s.
4267
4295
perSecondMultiplier := int (time .Second / disk .DefaultDiskStatsPollingInterval )
4268
4296
sm .DiskReadMaxBytesPerSecond .Update (int64 (maxRollingStats .BytesRead () * perSecondMultiplier ))
4269
4297
sm .DiskWriteMaxBytesPerSecond .Update (int64 (maxRollingStats .BytesWritten () * perSecondMultiplier ))
4298
+ sm .DiskReadMaxIOPS .Update (int64 (maxRollingStats .ReadsCount * perSecondMultiplier ))
4299
+ sm .DiskWriteMaxIOPS .Update (int64 (maxRollingStats .WritesCount * perSecondMultiplier ))
4270
4300
}
4271
4301
4272
4302
func (sm * StoreMetrics ) handleMetricsResult (ctx context.Context , metric result.Metrics ) {
0 commit comments