Merge #148634

craig[bot] · stevendanna · craig[bot] · commit 221c3dfcf242 · 2025-07-07T23:02:18.000Z
148634: kvnemesis: first step towards a fuzzed KVNemesis r=miraradeva a=stevendanna

You can run this test with the go fuzzer with something like:

  go test ./pkg/kv/kvnemesis/ -test.fuzz=FuzzKVNemesisSingleNode \
  -test.fuzzcachedir=_fuzzcache -v -test.run=^$ \
  -tags crdb_test  -timeout=300m -parallel=4

It can also be run under bazel, but I have not yet sorted out all of the flags needed to get a coverage enabled build and to ensure that the failing test cases get written somewhere that can be referenced on subsequent runs.

The idea here is that the fuzzer provides a []byte that then determines the output of all random decisions in KVNemesis. This doesn't account for metamorphic decisions made outside of KVNemesis.

KVNemesis is a rather heavyweight test which seemed to be a problem for running it reliably under go-fuzz; however, go-fuzz's poor diagnostics when the test worker crash has made it hard to determine the exact cause so far.

Epic: none
Release note: None

Co-authored-by: Steven Danna &lt;danna@cockroachlabs.com&gt;
diff --git a/pkg/kv/kvnemesis/kvnemesis_test.go b/pkg/kv/kvnemesis/kvnemesis_test.go
@@ -209,16 +209,24 @@ func randWithSeed(
 	t interface {
 		Logf(string, ...interface{})
 		Helper()
-	}, seedOrZero int64,
+	}, cfg kvnemesisTestCfg,
 ) (*rand.Rand, counter, int64) {
 	t.Helper()
+
 	var rngSource rand.Source
-	if seedOrZero > 0 {
-		rngSource = rand.NewSource(seedOrZero)
+	seedOrZero := cfg.seedOverride
+	if cfg.randSource != nil {
+		rngSource = cfg.randSource
+		t.Logf("using config-supplied random source, seed ignored")
 	} else {
-		rngSource, seedOrZero = randutil.NewTestRandSource()
+		if seedOrZero > 0 {
+			rngSource = rand.NewSource(seedOrZero)
+		} else {
+			rngSource, seedOrZero = randutil.NewTestRandSource()
+		}
+		t.Logf("seed: %d", seedOrZero)
 	}
-	t.Logf("seed: %d", seedOrZero)
+
 	countingSource := newCountingSource(rngSource.(rand.Source64))
 	return rand.New(countingSource), countingSource, seedOrZero
 }
@@ -233,7 +241,7 @@ type tBridge struct {
 	ll logLogger
 }
 
-func newTBridge(t *testing.T) *tBridge {
+func newTBridge(t testing.TB) *tBridge {
 	// NB: we're not using t.TempDir() because we want these to survive
 	// on failure.
 	td, err := os.MkdirTemp(datapathutils.DebuggableTempDir(), "kvnemesis")
@@ -263,6 +271,7 @@ type kvnemesisTestCfg struct {
 	numNodes     int
 	numSteps     int
 	concurrency  int
+	randSource   rand.Source
 	seedOverride int64
 	// The two knobs below inject illegal lease index errors and, for the
 	// resulting reproposals, reproposal errors. The injection is stateful and
@@ -418,6 +427,50 @@ func TestKVNemesisMultiNode(t *testing.T) {
 	})
 }
 
+// FuzzKVNemesisSingleNode is an attempt ot make it possible to run KVNemesis
+// with a coverage-guided fuzzer. It takes in []bytes as input and then uses
+// this to feed all random decisions in the test.
+func FuzzKVNemesisSingleNode(f *testing.F) {
+	defer leaktest.AfterTest(f)()
+	defer log.Scope(f).Close(f)
+
+	const (
+		// Set to > 0 to pre-generate corpus data.
+		corpusSize = 0
+		// I've set these to low values for now to at least get things running
+		// reliably. With all default settings the test runner fails without
+		// printing any useful info. I _think_ it might be the result of a
+		// hard-coded 10s timeout in the go-fuzz test worker.
+		numStep     = 10
+		concurrency = 1
+	)
+	for range corpusSize {
+		rndSource := randutil.NewRecordingRandSource(rand.NewSource(randutil.NewPseudoSeed()).(rand.Source64))
+		testKVNemesisImpl(f, kvnemesisTestCfg{
+			numNodes:                     1,
+			numSteps:                     numStep,
+			concurrency:                  concurrency,
+			randSource:                   rndSource,
+			invalidLeaseAppliedIndexProb: 0.2,
+			injectReproposalErrorProb:    0.2,
+			assertRaftApply:              true,
+		})
+		f.Add(rndSource.Output())
+	}
+
+	f.Fuzz(func(t *testing.T, data []byte) {
+		testKVNemesisImpl(t, kvnemesisTestCfg{
+			numNodes:                     1,
+			numSteps:                     numStep,
+			concurrency:                  concurrency,
+			randSource:                   randutil.NewFuzzRandSource(t, data),
+			invalidLeaseAppliedIndexProb: 0.2,
+			injectReproposalErrorProb:    0.2,
+			assertRaftApply:              true,
+		})
+	})
+}
+
 func TestKVNemesisMultiNode_LeaderLeases(t *testing.T) {
 	defer leaktest.AfterTest(t)()
 	defer log.Scope(t).Close(t)
@@ -434,7 +487,7 @@ func TestKVNemesisMultiNode_LeaderLeases(t *testing.T) {
 	})
 }
 
-func testKVNemesisImpl(t *testing.T, cfg kvnemesisTestCfg) {
+func testKVNemesisImpl(t testing.TB, cfg kvnemesisTestCfg) {
 	skip.UnderRace(t)
 
 	if !buildutil.CrdbTestBuild {
@@ -446,7 +499,7 @@ func testKVNemesisImpl(t *testing.T, cfg kvnemesisTestCfg) {
 
 	// Can set a seed here for determinism. This works best when the seed was
 	// obtained with cfg.concurrency=1.
-	rng, countingSource, seed := randWithSeed(t, cfg.seedOverride)
+	rng, countingSource, seed := randWithSeed(t, cfg)
 
 	// 4 nodes so we have somewhere to move 3x replicated ranges to.
 	ctx := context.Background()
@@ -513,7 +566,7 @@ func TestRunReproductionSteps(t *testing.T) {
 	// Paste a repro as printed by kvnemesis here.
 }
 
-func dumpRaftLogsOnFailure(t *testing.T, dir string, srvs []serverutils.TestServerInterface) {
+func dumpRaftLogsOnFailure(t testing.TB, dir string, srvs []serverutils.TestServerInterface) {
 	if !t.Failed() {
 		return
 	}
diff --git a/pkg/testutils/lint/lint_test.go b/pkg/testutils/lint/lint_test.go
@@ -1395,6 +1395,7 @@ func TestLint(t *testing.T) {
 			"--",
 			"*.go",
 			":!testutils/skip/skip.go",
+			":!util/randutil/rand.go",
 			":!cmd/roachtest/*.go",
 			":!acceptance/compose/*.go",
 			":!util/syncutil/*.go",
diff --git a/pkg/util/randutil/rand.go b/pkg/util/randutil/rand.go
@@ -13,6 +13,7 @@ import (
 	"math/rand"
 	"runtime"
 	"strings"
+	"testing"
 	"time"
 	_ "unsafe" // required by go:linkname
 
@@ -285,3 +286,100 @@ func getTestName() string {
 	}
 	return ""
 }
+
+// FuzzRandSource is a rand.Source whose output is completely determined by the
+// input bytes. This can be used by tests that make random decisions using an
+// RNG and want that to be driven by the fuzzer.
+//
+// Once the input runs out, the given test is marked as skipped and 42 is
+// returned.
+//
+// TODO(ssd): My suspicion is that this is better than simply allowing the
+// fuzzer to set the seed of our random number generator. With this, the
+// fuzzer's next step can change a single decision without affecting all
+// previous decisions in the test, giving it some ability to direct its
+// exploration.
+type FuzzRandSource struct {
+	t     testing.TB
+	input []byte
+}
+
+var _ rand.Source64 = (*FuzzRandSource)(nil)
+
+func NewFuzzRandSource(t testing.TB, input []byte) *FuzzRandSource {
+	return &FuzzRandSource{
+		t:     t,
+		input: input,
+	}
+}
+
+func (s *FuzzRandSource) getBytes(n int) []byte {
+	if len(s.input) < n {
+		return nil
+	}
+	ret := s.input[0:n]
+	s.input = s.input[n:]
+	return ret
+}
+
+const (
+	uint64Size      = 8
+	rngMask         = (1 << 63) - 1
+	outOfInputValue = 42
+)
+
+func (s *FuzzRandSource) Int63() int64 {
+	return int64(s.Uint64() & rngMask)
+}
+
+func (s *FuzzRandSource) Uint64() uint64 {
+	data := s.getBytes(uint64Size)
+	if data == nil {
+		s.t.Skip("insufficient input bytes")
+		return outOfInputValue
+	}
+	return binary.LittleEndian.Uint64(data)
+}
+
+// Seed does nothing for FuzzRandSource.
+func (s *FuzzRandSource) Seed(int64) {}
+
+// RecordingRandSource records the output of the inner source. The intended use
+// is to produce a "corpus" for a fuzzer that will use FuzzRandSource.
+//
+// No work has been put into allocating the output efficiently.
+type RecordingRandSource struct {
+	inner  rand.Source64
+	output []byte
+}
+
+func NewRecordingRandSource(source rand.Source64) *RecordingRandSource {
+	return &RecordingRandSource{
+		inner:  source,
+		output: make([]byte, 0, uint64Size*32),
+	}
+}
+
+func (s *RecordingRandSource) putUint64(n uint64) {
+	start := len(s.output)
+	s.output = append(s.output, make([]byte, uint64Size)...)
+	binary.LittleEndian.PutUint64(s.output[start:start+uint64Size], n)
+}
+
+func (s *RecordingRandSource) Uint64() uint64 {
+	ret := s.inner.Uint64()
+	s.putUint64(ret)
+	return ret
+}
+
+func (s *RecordingRandSource) Int63() int64 {
+	return int64(s.Uint64() & rngMask)
+}
+
+func (s *RecordingRandSource) Seed(seed int64) {
+	s.inner.Seed(seed)
+}
+
+func (s *RecordingRandSource) Output() []byte {
+	return s.output
+}