From cd07a0f8e5107292a0d7d0192feaf60420b02167 Mon Sep 17 00:00:00 2001 From: Mira Radeva Date: Wed, 7 Jan 2026 13:09:51 -0500 Subject: [PATCH] kvnemesis: disable SQL ops in safety mode Currently, there is a single kvnemesis operation that executes via SQL: `ToggleGlobalReads`. We have seen this operation get stuck and cause the test to timeout under safety more. The expected behavior is that any stuck operations would time out, but it seems like there is a context cancelation propagation issue, most likely in lib/pq, but not confirmed. This commit disables `ToggleGlobalReads` in safety mode to reduce test failures. This change would also help confirm that this is the only operation susceptible to the hanging behavior. Fixes: #160293 Release note: None --- pkg/kv/kvnemesis/kvnemesis_test.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/kv/kvnemesis/kvnemesis_test.go b/pkg/kv/kvnemesis/kvnemesis_test.go index 2a8dd33859b3..7e423d8a4a01 100644 --- a/pkg/kv/kvnemesis/kvnemesis_test.go +++ b/pkg/kv/kvnemesis/kvnemesis_test.go @@ -514,6 +514,11 @@ func TestKVNemesisMultiNode_Partition_Safety(t *testing.T) { testGeneratorConfig: func(cfg *GeneratorConfig) { cfg.Ops.Fault.AddNetworkPartition = 1 cfg.Ops.Fault.RemoveNetworkPartition = 1 + // This is the only operation that executes via SQL. As such, we suspect + // context cancellations are not always respected, resulting in the test + // hanging. The current suspect is lib/pq. See #160293. + // TODO(mira): Consider toggling global reads by editing the span config. + cfg.Ops.ChangeZone.ToggleGlobalReads = 0 }, }) }