@@ -8,90 +8,87 @@ package structlogging_test
8
8
import (
9
9
"context"
10
10
"encoding/json"
11
- "errors"
12
11
"fmt"
13
12
"regexp"
14
13
"testing"
15
14
"time"
16
15
17
- "github.com/cockroachdb/cockroach/pkg/base"
18
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver"
19
- "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/plan"
20
- "github.com/cockroachdb/cockroach/pkg/roachpb"
16
+ "github.com/cockroachdb/cockroach/pkg/server/serverpb"
21
17
"github.com/cockroachdb/cockroach/pkg/server/structlogging"
22
18
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
23
19
"github.com/cockroachdb/cockroach/pkg/testutils"
24
- "github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
25
20
"github.com/cockroachdb/cockroach/pkg/testutils/skip"
26
21
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
27
22
"github.com/cockroachdb/cockroach/pkg/util/log"
28
23
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
29
24
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
25
+ "github.com/cockroachdb/cockroach/pkg/util/stop"
30
26
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
27
+ "github.com/cockroachdb/errors"
31
28
"github.com/stretchr/testify/require"
32
29
)
33
30
34
31
// setup an impossibly low cpu threshold, test clusters
35
32
// do not seem to record cpu utilization per replica.
36
33
const lowCPUThreshold = time .Duration (- 1 )
37
34
const highCPUThreshold = time .Second
38
- const defaultTestWait = time .Second
39
35
const lowDelay = 50 * time .Millisecond
40
36
const highDelay = time .Minute
41
37
42
38
// TestHotRangeLogger tests that hot ranges stats are logged per node.
43
39
// It uses system ranges to verify behavior.
44
40
func TestHotRangeLoggerSettings (t * testing.T ) {
45
- skip .WithIssue (t , 145412 )
41
+ defer leaktest .AfterTest (t )()
42
+
46
43
skip .UnderRace (t )
47
- skip .UnderStress (t )
48
44
ctx := context .Background ()
49
45
50
- // We only want to run this once within the suite, as the
51
- // subsystem we depend on takes on the order of whole seconds
52
- // to warm.
53
- s , spy , teardown := setupTestServer (t , ctx )
46
+ settings , spy , teardown := setupTestServer (t , ctx )
54
47
defer teardown ()
55
48
56
49
for _ , test := range []struct {
57
50
enabled bool
58
51
tickerInterval time.Duration
59
52
logSettingInterval time.Duration
60
- waitFor time.Duration
61
53
logCPUThreshold time.Duration
62
54
hasLogs bool
63
55
}{
64
56
// Tests the straightforward use case, where we expect no threshold,
65
57
// a minimal interval, minimal loop, and zero threshold should
66
58
// result in multiple logs.
67
- {true , lowDelay , lowDelay , defaultTestWait , lowCPUThreshold , true },
59
+ {true , lowDelay , lowDelay , lowCPUThreshold , true },
68
60
69
61
// This test is the same as the default case, except the
70
62
// cluster setting which controls logging is off.
71
- {false , lowDelay , lowDelay , defaultTestWait , lowCPUThreshold , false },
63
+ {false , lowDelay , lowDelay , lowCPUThreshold , false },
72
64
73
65
// This test validates that even when we check on a low cadance,
74
66
// if the threshold is not passed and the interval is long,
75
67
// no logs will appear.
76
- {true , lowDelay , highDelay , defaultTestWait , highCPUThreshold , false },
68
+ {true , lowDelay , highDelay , highCPUThreshold , false },
77
69
78
70
// This test validates that even if the interval is long,
79
71
// if the cpu threshold is low, and its checked, the system
80
72
// will produce logs.
81
- {true , lowDelay , highDelay , defaultTestWait , lowCPUThreshold , true },
73
+ {true , lowDelay , highDelay , lowCPUThreshold , true },
82
74
83
75
// This test validates with a high check cadance, no logs
84
76
// will appear, even if the interval and thresholds are low.
85
- {true , highDelay , lowDelay , defaultTestWait , lowCPUThreshold , false },
77
+ {true , highDelay , lowDelay , lowCPUThreshold , false },
86
78
87
79
// This test checks that if there's a low logging interval
88
80
// if the cpuThreshold is high, logs will still appear.
89
- {true , lowDelay , lowDelay , defaultTestWait , highCPUThreshold , true },
81
+ {true , lowDelay , lowDelay , highCPUThreshold , true },
90
82
} {
91
83
t .Run (fmt .Sprintf ("settings tests %v" , test ), func (t * testing.T ) {
92
- setupTest (ctx , s .ClusterSettings (), test .enabled , test .logSettingInterval , test .tickerInterval , test .logCPUThreshold , spy )
93
- time .Sleep (test .waitFor )
94
- require .Equal (t , test .hasLogs , hasNonZeroQPSRange (spy .Logs ()))
84
+ setupTest (ctx , settings , test .enabled , test .logSettingInterval , test .tickerInterval , test .logCPUThreshold , spy )
85
+ testutils .SucceedsSoon (t , func () error {
86
+ actual := hasNonZeroQPSRange (spy .Logs ())
87
+ if test .hasLogs != actual {
88
+ return errors .Errorf ("expected hasLogs %v, got %v" , test .hasLogs , actual )
89
+ }
90
+ return nil
91
+ })
95
92
})
96
93
}
97
94
@@ -105,15 +102,23 @@ func TestHotRangeLoggerSettings(t *testing.T) {
105
102
}
106
103
107
104
// without a limit set, we should see many ranges.
108
- setupTest (ctx , s .ClusterSettings (), true , lowDelay , lowDelay , lowCPUThreshold , spy )
109
- time .Sleep (time .Second )
110
- require .Greater (t , countSeenRanges (spy .Logs ()), 1 )
105
+ setupTest (ctx , settings , true , lowDelay , lowDelay , lowCPUThreshold , spy )
106
+ testutils .SucceedsSoon (t , func () error {
107
+ if actual := countSeenRanges (spy .Logs ()); actual <= 1 {
108
+ return fmt .Errorf ("expected >1 range, got %d" , actual )
109
+ }
110
+ return nil
111
+ })
111
112
112
113
// with a limit, only one range should show up.
113
114
structlogging .ReportTopHottestRanges = 1
114
- setupTest (ctx , s .ClusterSettings (), true , lowDelay , lowDelay , lowCPUThreshold , spy )
115
- time .Sleep (time .Second )
116
- require .Equal (t , countSeenRanges (spy .Logs ()), 1 )
115
+ setupTest (ctx , settings , true , lowDelay , lowDelay , lowCPUThreshold , spy )
116
+ testutils .SucceedsSoon (t , func () error {
117
+ if actual := countSeenRanges (spy .Logs ()); actual != 1 {
118
+ return fmt .Errorf ("expected 1 range, got %d" , actual )
119
+ }
120
+ return nil
121
+ })
117
122
})
118
123
}
119
124
@@ -125,16 +130,12 @@ func TestHotRangeLoggerSettings(t *testing.T) {
125
130
// - For app tenants, a job is initialized for the hot ranges
126
131
// logger, whereas for the system tenant it runs as a task.
127
132
func TestHotRangeLoggerMultitenant (t * testing.T ) {
133
+ defer leaktest .AfterTest (t )()
134
+
128
135
skip .UnderRace (t )
129
136
ctx := context .Background ()
130
- s , spy , teardown := setupTestServer (t , ctx )
131
- tenantID := roachpb .MustMakeTenantID (2 )
132
- tt , err := s .TenantController ().StartTenant (ctx , base.TestTenantArgs {
133
- TenantID : tenantID ,
134
- })
137
+ _ , spy , teardown := setupTestServer (t , ctx )
135
138
spy .Logs ()
136
- require .NoError (t , err )
137
- require .NotNil (t , tt )
138
139
// TODO (brian): the jobs system isn't registering this correctly,
139
140
// this will be fixed in a short follow pr.
140
141
defer teardown ()
@@ -188,65 +189,78 @@ func (spy *hotRangesLogSpy) Reset() {
188
189
spy .mu .logs = nil
189
190
}
190
191
192
+ type testHotRangeGetter struct {}
193
+
194
+ func (t testHotRangeGetter ) HotRangesV2 (
195
+ ctx context.Context , req * serverpb.HotRangesRequest ,
196
+ ) (* serverpb.HotRangesResponseV2 , error ) {
197
+ if req .PerNodeLimit == 1 {
198
+ return & serverpb.HotRangesResponseV2 {
199
+ Ranges : []* serverpb.HotRangesResponseV2_HotRange {
200
+ {
201
+ RangeID : 1 ,
202
+ CPUTimePerSecond : float64 (100 * time .Millisecond ),
203
+ QPS : float64 (100 ),
204
+ },
205
+ },
206
+ }, nil
207
+ }
208
+ return & serverpb.HotRangesResponseV2 {
209
+ Ranges : []* serverpb.HotRangesResponseV2_HotRange {
210
+ {
211
+ RangeID : 1 ,
212
+ CPUTimePerSecond : float64 (100 * time .Millisecond ),
213
+ QPS : float64 (100 ),
214
+ },
215
+ {
216
+ RangeID : 2 ,
217
+ CPUTimePerSecond : float64 (1300 * time .Millisecond ),
218
+ QPS : float64 (100 ),
219
+ },
220
+ {
221
+ RangeID : 3 ,
222
+ CPUTimePerSecond : float64 (900 * time .Millisecond ),
223
+ QPS : float64 (100 ),
224
+ },
225
+ },
226
+ }, nil
227
+ }
228
+
229
+ var _ structlogging.HotRangeGetter = testHotRangeGetter {}
230
+
191
231
// setupTestServer is a somewhat lengthy warmup process
192
232
// to ensure that the hot ranges tests are ready to run.
193
233
// It sets up a cluster, runs it until the hot range stats are
194
234
// warm by dialing the knobs to noisy, and checking for output,
195
235
// then redials the knobs back to quiet so the test can take over.
196
236
func setupTestServer (
197
237
t * testing.T , ctx context.Context ,
198
- ) (serverutils. TestServerInterface , * hotRangesLogSpy , func ()) {
238
+ ) (* cluster. Settings , * hotRangesLogSpy , func ()) {
199
239
sc := log .ScopeWithoutShowLogs (t )
200
240
spy := & hotRangesLogSpy {t : t }
201
241
202
242
// override internal settings.
203
243
structlogging .ReportTopHottestRanges = 1000
204
244
structlogging .CheckInterval = 100 * time .Millisecond
205
245
206
- s := serverutils .StartServerOnly (t , base.TestServerArgs {
207
- DefaultTestTenant : base .TestControlsTenantsExplicitly ,
208
- Knobs : base.TestingKnobs {
209
- Store : & kvserver.StoreTestingKnobs {
210
- ReplicaPlannerKnobs : plan.ReplicaPlannerTestingKnobs {
211
- DisableReplicaRebalancing : true ,
212
- },
213
- },
214
- },
215
- })
216
-
217
- leakChecker := leaktest .AfterTest (t )
218
246
logInterceptor := log .InterceptWith (ctx , spy )
219
- stopper := s .Stopper ()
247
+ stopper := stop .NewStopper ()
248
+ settings := cluster .MakeTestingClusterSettings ()
220
249
teardown := func () {
221
250
stopper .Stop (ctx )
222
251
sc .Close (t )
223
252
logInterceptor ()
224
- leakChecker ()
225
253
}
226
254
227
- ts := s .ApplicationLayer ()
228
-
229
255
// lower settings so that we can wait for the stats to warm.
230
- structlogging .TelemetryHotRangesStatsEnabled .Override (ctx , & ts .ClusterSettings ().SV , true )
231
- structlogging .TelemetryHotRangesStatsInterval .Override (ctx , & ts .ClusterSettings ().SV , time .Millisecond )
232
- structlogging .TelemetryHotRangesStatsLoggingDelay .Override (ctx , & ts .ClusterSettings ().SV , 0 * time .Millisecond )
233
-
234
- // simulate some queries.
235
- for range 1000 {
236
- _ , err := ts .SQLConn (t ).Exec ("SELECT * FROM system.namespace" )
237
- require .NoError (t , err )
238
- }
239
-
240
- testutils .SucceedsSoon (t , func () error {
241
- logs := spy .Logs ()
256
+ structlogging .TelemetryHotRangesStatsEnabled .Override (ctx , & settings .SV , true )
257
+ structlogging .TelemetryHotRangesStatsInterval .Override (ctx , & settings .SV , time .Millisecond )
258
+ structlogging .TelemetryHotRangesStatsLoggingDelay .Override (ctx , & settings .SV , 0 * time .Millisecond )
242
259
243
- if hasNonZeroQPSRange (logs ) {
244
- return nil
245
- }
246
- return errors .New ("waited too long for the synthetic data" )
247
- })
260
+ err := structlogging .StartHotRangesLoggingScheduler (ctx , stopper , testHotRangeGetter {}, settings , nil )
261
+ require .NoError (t , err )
248
262
249
- return s , spy , teardown
263
+ return settings , spy , teardown
250
264
}
251
265
252
266
// Utility function which generally indicates that the hot ranges
@@ -273,8 +287,7 @@ func setupTest(
273
287
structlogging .TelemetryHotRangesStatsInterval .Override (ctx , & st .SV , logInterval )
274
288
structlogging .TelemetryHotRangesStatsCPUThreshold .Override (ctx , & st .SV , logCPUThreshold )
275
289
structlogging .CheckInterval = tickerInterval
276
- // wait for the activity from the previous test to drain.
277
- time .Sleep (100 * time .Millisecond )
278
290
structlogging .TestLoopChannel <- struct {}{}
291
+ log .FlushAllSync ()
279
292
spy .Reset ()
280
293
}
0 commit comments