Skip to content

Commit c81963c

Browse files
craig[bot]spilchenaa-joshi
committed
155144: roachtest: add benchmark for INSPECT under admission control r=spilchen a=spilchen Add a new roachtest to measure the impact of admission control on INSPECT operations running concurrently with foreground workloads. As part of this, introduce a cluster setting knob to control admission control behavior for INSPECT operations, allowing them to be run with either BulkLowQoS (enabled) or Normal QoS (disabled). The test performs three INSPECT runs: 1. A calibration run (with no workload) to dynamically determine test duration. 2. An INSPECT run with admission control enabled (BulkLowQoS) during a foreground read workload. 3. An INSPECT run with admission control disabled (Normal QoS) during the same workload. Each run measures and reports throughput (rows/s/CPU), enabling us to quantify: - The performance impact of admission control on INSPECT operations. - The effectiveness of admission control in protecting foreground workload latency. The workload duration is dynamically calculated to ensure it's long enough to encompass both INSPECT runs. Informs: #154457 Epic: CRDB-30356 Release note: none 155148: cli(tsdump): upsample counter metric of 30 min interval during Datadog upload r=aa-joshi a=aa-joshi Previously, We were uploading counter metric types to Datadog without considering the metric interval. The Datadog honours the interval which is set in the metrics summary. Here, the value is set as `10` seconds considering default scrape interval. This resulted into improper visulisation of the metrics which are generated at 30min interval. To address this, this patch upsample counter metric type of 30 min interval to 10 second interval. This means that we add 180 data points (30min/10sec=180) so that the counter metrics can be visualised properly irrespective of the metric interval. Epic: none Part of: CRDB-54228 Release note: None Co-authored-by: Matt Spilchen <[email protected]> Co-authored-by: Akshay Joshi <[email protected]>
3 parents 4cf9616 + 6aa1028 + 30f21e6 commit c81963c

File tree

11 files changed

+446
-7
lines changed

11 files changed

+446
-7
lines changed

pkg/cli/tsdump.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ var debugTimeSeriesDumpOpts = struct {
7676
var hostNameOverride string
7777

7878
// datadogSeriesThreshold holds the threshold for the number of series
79-
// that will be uploaded to Datadog in a single request. We have capped it to 100
79+
// that will be uploaded to Datadog in a single request. We have capped it to 50
8080
// to avoid hitting the Datadog API limits.
81-
var datadogSeriesThreshold = 100
81+
var datadogSeriesThreshold = 50
8282

8383
const uploadWorkerErrorMessage = "--upload-workers is set to an invalid value." +
8484
" please select a value which between 1 and 100."

pkg/cli/tsdump_upload.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,72 @@ type FailedRequestsFile struct {
8686
Requests []FailedRequest `json:"requests"`
8787
}
8888

89+
// GapFillProcessor interpolates 30-minute resolution counter metrics to 10-second resolution
90+
// by filling gaps with zero values while preserving the original data points.
91+
type GapFillProcessor struct{}
92+
93+
func NewGapFillProcessor() *GapFillProcessor {
94+
return &GapFillProcessor{}
95+
}
96+
97+
// processCounterMetric interpolates 30-minute resolution counter metrics to 10-second resolution.
98+
// It checks if the metric is a counter type with 30-minute interval (1800 seconds) and converts
99+
// it to 10-second resolution by filling gaps with zero values between original data points.
100+
func (gfp *GapFillProcessor) processCounterMetric(series *datadogV2.MetricSeries) error {
101+
// Only process counter metrics
102+
if series.Type == nil || *series.Type != datadogV2.METRICINTAKETYPE_COUNT {
103+
return nil
104+
}
105+
106+
// Only process 30-minute resolution metrics (1800 seconds)
107+
if series.Interval == nil || *series.Interval != 1800 {
108+
return nil
109+
}
110+
111+
// If no points or only one point, nothing to interpolate
112+
if len(series.Points) <= 1 {
113+
// Still update interval to 10 seconds for consistency
114+
series.Interval = datadog.PtrInt64(10)
115+
return nil
116+
}
117+
118+
// Create new points array with interpolated values
119+
var newPoints []datadogV2.MetricPoint
120+
121+
for i := 0; i < len(series.Points); i++ {
122+
currentValue := *series.Points[i].Value
123+
currentTimestamp := *series.Points[i].Timestamp
124+
125+
// Distribute the delta value across 180 points (1800s / 10s = 180)
126+
distributedValue := currentValue / 180.0
127+
128+
newPoints = append(newPoints, datadogV2.MetricPoint{
129+
Timestamp: datadog.PtrInt64(currentTimestamp),
130+
Value: datadog.PtrFloat64(distributedValue),
131+
})
132+
133+
// Add 179 zero points (10-second intervals) between current and next
134+
for j := 1; j < 180; j++ {
135+
// We are adding delta of 0 so that same distributed value is getting published
136+
// across all points. This would help us to perform roll ups with 10 seconds
137+
// for metrics with 30 minute intervals.
138+
// metric value = (metric value/180) * 180
139+
interpolatedTimestamp := currentTimestamp + int64(j*10)
140+
newPoints = append(newPoints, datadogV2.MetricPoint{
141+
Timestamp: datadog.PtrInt64(interpolatedTimestamp),
142+
Value: datadog.PtrFloat64(0.0),
143+
})
144+
}
145+
146+
}
147+
148+
// Update the series with new points and 10-second interval
149+
series.Points = newPoints
150+
series.Interval = datadog.PtrInt64(10)
151+
152+
return nil
153+
}
154+
89155
var newTsdumpUploadID = func(uploadTime time.Time) string {
90156
clusterTagValue := "cluster-debug"
91157
if debugTimeSeriesDumpOpts.clusterLabel != "" {
@@ -122,6 +188,8 @@ type datadogWriter struct {
122188
hasFailedRequestsInUpload bool
123189
// cumulativeToDeltaProcessor is used to convert cumulative counter metrics to delta metrics
124190
cumulativeToDeltaProcessor *CumulativeToDeltaProcessor
191+
// gapFillProcessor is used to interpolate 30-minute resolution counter metrics to 10-second resolution
192+
gapFillProcessor *GapFillProcessor
125193
}
126194

127195
func makeDatadogWriter(
@@ -185,6 +253,7 @@ func makeDatadogWriter(
185253
noOfUploadWorkers: noOfUploadWorkers,
186254
isPartialUploadOfFailedRequests: isPartialUploadOfFailedRequests,
187255
cumulativeToDeltaProcessor: NewCumulativeToDeltaProcessor(),
256+
gapFillProcessor: NewGapFillProcessor(),
188257
}, nil
189258
}
190259

@@ -283,6 +352,11 @@ func (d *datadogWriter) dump(kv *roachpb.KeyValue) (*datadogV2.MetricSeries, err
283352
}
284353
}
285354

355+
// Process gap-filling for 30-minute resolution counter metrics
356+
if err := d.gapFillProcessor.processCounterMetric(series); err != nil {
357+
return nil, err
358+
}
359+
286360
return series, nil
287361
}
288362

pkg/cli/tsdump_upload_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -830,3 +830,68 @@ func TestDatadogInit(t *testing.T) {
830830
require.Contains(t, receivedMetrics, expectedMetric, "expected metric %s should be present", expectedMetric)
831831
}
832832
}
833+
834+
// TestGapFillProcessor30MinTo10s tests the gap-filling functionality for 30-minute counter metrics.
835+
// It verifies that 30-minute resolution counter metrics are correctly interpolated to 10-second resolution
836+
// with zero-filled gaps between original data points.
837+
func TestGapFillProcessor30MinTo10s(t *testing.T) {
838+
defer leaktest.AfterTest(t)()
839+
defer log.Scope(t).Close(t)
840+
841+
processor := NewGapFillProcessor()
842+
843+
// Create a mock 30-minute resolution counter metric with 2 data points
844+
series := &datadogV2.MetricSeries{
845+
Metric: "cr.node.test-counter-count",
846+
Tags: []string{"node_id:1"},
847+
Type: datadogV2.METRICINTAKETYPE_COUNT.Ptr(),
848+
Interval: datadog.PtrInt64(1800), // 30 minutes in seconds
849+
Points: []datadogV2.MetricPoint{
850+
{
851+
Timestamp: datadog.PtrInt64(1000), // t=1000
852+
Value: datadog.PtrFloat64(50), // first 30-min point
853+
},
854+
{
855+
Timestamp: datadog.PtrInt64(2800), // t=2800 (1800 seconds later)
856+
Value: datadog.PtrFloat64(55), // second 30-min point
857+
},
858+
},
859+
}
860+
861+
// Process the series
862+
err := processor.processCounterMetric(series)
863+
require.NoError(t, err)
864+
865+
// Verify the interval was updated to 10 seconds
866+
require.NotNil(t, series.Interval)
867+
require.Equal(t, int64(10), *series.Interval)
868+
869+
// Verify we have the correct number of points
870+
// Original: 2 points
871+
// These 2 points are uniformly distributed in 180 data points.
872+
// So expected data points would be 2 * 180 = 360.
873+
expectedPoints := 360
874+
require.Len(t, series.Points, expectedPoints)
875+
876+
// Verify the first original point is preserved
877+
require.Equal(t, int64(1000), *series.Points[0].Timestamp)
878+
require.Equal(t, 50.0/180, *series.Points[0].Value)
879+
880+
// Verify the first & last interpolated points are zeros with correct timestamps
881+
// for first data point
882+
require.Equal(t, int64(1010), *series.Points[1].Timestamp)
883+
require.Equal(t, 0.0, *series.Points[1].Value)
884+
885+
require.Equal(t, int64(2790), *series.Points[179].Timestamp)
886+
require.Equal(t, 0.0, *series.Points[179].Value)
887+
888+
// Verify the first & last interpolated points are zeros with correct timestamps
889+
index := 180
890+
require.Equal(t, int64(2800), *series.Points[index].Timestamp)
891+
require.Equal(t, 55.0/180, *series.Points[index].Value)
892+
893+
// Verify a point near the end has zero value (gap-filled)
894+
lastIndex := len(series.Points) - 1
895+
require.Equal(t, int64(4590), *series.Points[lastIndex].Timestamp)
896+
require.Equal(t, 0.0, *series.Points[lastIndex].Value)
897+
}

pkg/cmd/roachtest/tests/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ go_library(
1818
"admission_control_follower_overload.go",
1919
"admission_control_index_backfill.go",
2020
"admission_control_index_overload.go",
21+
"admission_control_inspect.go",
2122
"admission_control_intent_resolution.go",
2223
"admission_control_multi_store_index_backfill.go",
2324
"admission_control_multi_store_overload.go",

0 commit comments

Comments
 (0)