Skip to content

Commit 9e41a32

Browse files
committed
kvfollowerreadsccl: maybe deflake TestBoundedStalenessDataDriven
This test determines what events occur by parsing the trace. In some cases, the parsing it was using to determine a "local read followed by remote leaseholder read" didn't account for changes in the potential trace messages encountered when leader leases are enabled. Here, I widen the scope of the trace parsing. Locally under stress this elliminated the previously encountered failure: ``` datadriven.go:357: ... SNIP ... boundedstaleness/single_row:24: still running after 10.000889738s ... SNIP ... boundedstaleness_test.go:405: condition failed to evaluate within 45s: from boundedstaleness_test.go:436: not yet a match, output: 1 events (1 found): * event 1: colbatchscan trace on node_idx 2: local read datadriven.go:343: ``` Fixes #154710 Release note: None
1 parent 6816c43 commit 9e41a32

File tree

1 file changed

+38
-14
lines changed

1 file changed

+38
-14
lines changed

pkg/ccl/kvccl/kvfollowerreadsccl/boundedstaleness_test.go

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ var (
4545
)
4646
)
4747

48+
// fullTraceDebug is a flag that controls whether full traces are printed in the
49+
// case of some errors.
50+
const fullTraceDebug = false
51+
4852
func TestBoundedStalenessEnterpriseLicense(t *testing.T) {
4953
defer leaktest.AfterTest(t)()
5054
defer log.Scope(t).Close(t)
@@ -148,8 +152,10 @@ type boundedStalenessEvents struct {
148152
// A mutex is needed as the event handlers (onStmtTrace) can race.
149153
mu struct {
150154
syncutil.Mutex
151-
stmt string
152-
events []boundedStalenessDataDrivenEvent
155+
stmt string
156+
// Only populated if fullTraceDebug constant is true.
157+
traceForDebugging string
158+
events []boundedStalenessDataDrivenEvent
153159
}
154160
}
155161

@@ -167,6 +173,13 @@ func (bse *boundedStalenessEvents) clearEvents() {
167173
bse.mu.events = nil
168174
}
169175

176+
func (bse *boundedStalenessEvents) fullTrace() string {
177+
bse.mu.Lock()
178+
defer bse.mu.Unlock()
179+
180+
return bse.mu.traceForDebugging
181+
}
182+
170183
func (bse *boundedStalenessEvents) setStmt(s string) {
171184
bse.mu.Lock()
172185
defer bse.mu.Unlock()
@@ -240,17 +253,23 @@ func (bse *boundedStalenessEvents) onStmtTrace(nodeIdx int, rec tracingpb.Record
240253
defer bse.mu.Unlock()
241254

242255
if bse.mu.stmt != "" && bse.mu.stmt == stmt {
256+
if fullTraceDebug {
257+
bse.mu.traceForDebugging = rec.String()
258+
}
259+
243260
spans := make(map[tracingpb.SpanID]tracingpb.RecordedSpan)
244261
for _, sp := range rec {
245262
spans[sp.SpanID] = sp
263+
notLeaseHolderError := tracing.LogsContainMsg(sp, "[NotLeaseHolderError] lease held by different store;")
264+
notLeaseHolderError = notLeaseHolderError || tracing.LogsContainMsg(sp, "[NotLeaseHolderError] leader lease is not held locally, cannot determine validity;")
265+
246266
if sp.Operation == "dist sender send" && spans[sp.ParentSpanID].Operation == "colbatchscan" {
247267
bse.mu.events = append(bse.mu.events, &boundedStalenessTraceEvent{
248-
operation: spans[sp.ParentSpanID].Operation,
249-
nodeIdx: nodeIdx,
250-
localRead: tracing.LogsContainMsg(sp, kvbase.RoutingRequestLocallyMsg),
251-
followerRead: kvtestutils.OnlyFollowerReads(rec),
252-
remoteLeaseholderRead: tracing.LogsContainMsg(sp, "[NotLeaseHolderError] lease held by different store;") &&
253-
tracing.LogsContainMsg(sp, "trying next peer"),
268+
operation: spans[sp.ParentSpanID].Operation,
269+
nodeIdx: nodeIdx,
270+
localRead: tracing.LogsContainMsg(sp, kvbase.RoutingRequestLocallyMsg),
271+
followerRead: kvtestutils.OnlyFollowerReads(rec),
272+
remoteLeaseholderRead: notLeaseHolderError && tracing.LogsContainMsg(sp, "trying next peer"),
254273
})
255274
}
256275
}
@@ -261,10 +280,7 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
261280
defer leaktest.AfterTest(t)()
262281
defer log.Scope(t).Close(t)
263282

264-
const msg = "1μs staleness reads may actually succeed due to the slow environment"
265-
skip.UnderStress(t, msg)
266-
skip.UnderRace(t, msg)
267-
skip.UnderDeadlock(t, msg)
283+
skip.UnderDuress(t, "1μs staleness reads may actually succeed due to the slow environment")
268284
defer ccl.TestingEnableEnterprise()()
269285

270286
ctx := context.Background()
@@ -426,14 +442,22 @@ func TestBoundedStalenessDataDriven(t *testing.T) {
426442
}
427443
}()
428444
if !followerRead {
445+
var trace string
446+
if fullTraceDebug {
447+
trace = fmt.Sprintf("\nfull_trace:\n%s", bse.fullTrace())
448+
}
429449
bse.clearEvents()
430-
return errors.AssertionFailedf("not follower reads found:\n%s", bse.String())
450+
return errors.AssertionFailedf("not follower reads found:\n%s%s", bse.String(), trace)
431451
}
432452
}
433453
if waitUntilMatch {
434454
if d.Expected != ret {
455+
var trace string
456+
if fullTraceDebug {
457+
trace = fmt.Sprintf("\nfull_trace:\n%s", bse.fullTrace())
458+
}
435459
bse.clearEvents()
436-
return errors.AssertionFailedf("not yet a match, output:\n%s\n", ret)
460+
return errors.AssertionFailedf("not yet a match, output:\n%s%s", ret, trace)
437461
}
438462
}
439463
return nil

0 commit comments

Comments
 (0)