Skip to content

Commit 6ca7d2f

Browse files
committed
server: refactor the sticky engine registry into a VFS registry
The 'StickyInMemEngineRegistry' has historically been the method of preserving local physical state within tests across node/TestCluster restarts. This registry predates Pebble and its pure-Go VFS interface. This commit refactors the sticky engine registry into a registry of virtual filesystems. This improves test coverage by exercising storage engine teardown and recovery code. The previous behavior of reusing the storage Engine without closing and re-opening is still supported with the server.ReuseEngines option. Close cockroachdb#107177. Epic: None Release note: None
1 parent 602bc79 commit 6ca7d2f

29 files changed

+522
-527
lines changed

pkg/base/store_spec.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -249,12 +249,11 @@ type StoreSpec struct {
249249
BallastSize *SizeSpec
250250
InMemory bool
251251
Attributes roachpb.Attributes
252-
// StickyInMemoryEngineID is a unique identifier associated with a given
253-
// store which will remain in memory even after the default Engine close
254-
// until it has been explicitly cleaned up by CleanupStickyInMemEngine[s]
255-
// or the process has been terminated.
256-
// This only applies to in-memory storage engine.
257-
StickyInMemoryEngineID string
252+
// StickyVFSID is a unique identifier associated with a given store which
253+
// will preserve the in-memory virtual file system (VFS) even after the
254+
// storage engine has been closed. This only applies to in-memory storage
255+
// engine.
256+
StickyVFSID string
258257
// UseFileRegistry is true if the "file registry" store version is desired.
259258
// This is set by CCL code when encryption-at-rest is in use.
260259
UseFileRegistry bool

pkg/cli/debug_recover_loss_of_quorum_test.go

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -328,19 +328,18 @@ func TestStageVersionCheck(t *testing.T) {
328328
})
329329
defer c.Cleanup()
330330

331-
storeReg := server.NewStickyInMemEnginesRegistry()
332-
defer storeReg.CloseAllStickyInMemEngines()
331+
storeReg := server.NewStickyVFSRegistry()
333332
tc := testcluster.NewTestCluster(t, 4, base.TestClusterArgs{
334333
ReplicationMode: base.ReplicationManual,
335334
ServerArgsPerNode: map[int]base.TestServerArgs{
336335
0: {
337336
Knobs: base.TestingKnobs{
338337
Server: &server.TestingKnobs{
339-
StickyEngineRegistry: storeReg,
338+
StickyVFSRegistry: storeReg,
340339
},
341340
},
342341
StoreSpecs: []base.StoreSpec{
343-
{InMemory: true, StickyInMemoryEngineID: "1"},
342+
{InMemory: true, StickyVFSID: "1"},
344343
},
345344
},
346345
},
@@ -378,7 +377,7 @@ func TestStageVersionCheck(t *testing.T) {
378377
})
379378
require.NoError(t, err, "force local must fix incorrect version")
380379
// Check that stored plan has version matching cluster version.
381-
fs, err := storeReg.GetUnderlyingFS(base.StoreSpec{InMemory: true, StickyInMemoryEngineID: "1"})
380+
fs, err := storeReg.Get(base.StoreSpec{InMemory: true, StickyVFSID: "1"})
382381
require.NoError(t, err, "failed to get shared store fs")
383382
ps := loqrecovery.NewPlanStore("", fs)
384383
p, ok, err := ps.LoadPlan()
@@ -434,9 +433,6 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
434433
listenerReg := listenerutil.NewListenerRegistry()
435434
defer listenerReg.Close()
436435

437-
storeReg := server.NewStickyInMemEnginesRegistry()
438-
defer storeReg.CloseAllStickyInMemEngines()
439-
440436
// Test cluster contains 3 nodes that we would turn into a single node
441437
// cluster using loss of quorum recovery. To do that, we will terminate
442438
// two nodes and run recovery on remaining one. Restarting node should
@@ -452,7 +448,7 @@ func TestHalfOnlineLossOfQuorumRecovery(t *testing.T) {
452448
sa[i] = base.TestServerArgs{
453449
Knobs: base.TestingKnobs{
454450
Server: &server.TestingKnobs{
455-
StickyEngineRegistry: storeReg,
451+
StickyVFSRegistry: server.NewStickyVFSRegistry(),
456452
},
457453
},
458454
StoreSpecs: []base.StoreSpec{

pkg/cli/democluster/demo_cluster.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ type transientCluster struct {
8787
adminPassword string
8888
adminUser username.SQLUsername
8989

90-
stickyEngineRegistry server.StickyInMemEnginesRegistry
90+
stickyVFSRegistry server.StickyVFSRegistry
9191

9292
drainAndShutdown func(ctx context.Context, adminClient serverpb.AdminClient) error
9393

@@ -210,7 +210,7 @@ func NewDemoCluster(
210210
}
211211
}
212212

213-
c.stickyEngineRegistry = server.NewStickyInMemEnginesRegistry()
213+
c.stickyVFSRegistry = server.NewStickyVFSRegistry()
214214
return c, nil
215215
}
216216

@@ -314,7 +314,7 @@ func (c *transientCluster) Start(ctx context.Context) (err error) {
314314
// individual servers' Stop() methods have been registered
315315
// via createAndAddNode() above.
316316
c.stopper.AddCloser(stop.CloserFn(func() {
317-
c.stickyEngineRegistry.CloseAllStickyInMemEngines()
317+
c.stickyVFSRegistry.CloseAllEngines()
318318
}))
319319

320320
// Start the remaining nodes asynchronously.
@@ -600,7 +600,7 @@ func (c *transientCluster) createAndAddNode(
600600
}
601601
args := c.demoCtx.testServerArgsForTransientCluster(
602602
socketDetails, idx, joinAddr, c.demoDir,
603-
c.stickyEngineRegistry,
603+
c.stickyVFSRegistry,
604604
)
605605
if idx == 0 {
606606
// The first node also auto-inits the cluster.
@@ -878,11 +878,11 @@ func (demoCtx *Context) testServerArgsForTransientCluster(
878878
serverIdx int,
879879
joinAddr string,
880880
demoDir string,
881-
stickyEngineRegistry server.StickyInMemEnginesRegistry,
881+
stickyVFSRegistry server.StickyVFSRegistry,
882882
) base.TestServerArgs {
883883
// Assign a path to the store spec, to be saved.
884884
storeSpec := base.DefaultTestStoreSpec
885-
storeSpec.StickyInMemoryEngineID = fmt.Sprintf("demo-server%d", serverIdx)
885+
storeSpec.StickyVFSID = fmt.Sprintf("demo-server%d", serverIdx)
886886

887887
args := base.TestServerArgs{
888888
SocketFile: sock.filename(),
@@ -902,7 +902,7 @@ func (demoCtx *Context) testServerArgsForTransientCluster(
902902

903903
Knobs: base.TestingKnobs{
904904
Server: &server.TestingKnobs{
905-
StickyEngineRegistry: stickyEngineRegistry,
905+
StickyVFSRegistry: stickyVFSRegistry,
906906
},
907907
JobsTestingKnobs: &jobs.TestingKnobs{
908908
// Allow the scheduler daemon to start earlier in demo.
@@ -1160,7 +1160,7 @@ func (c *transientCluster) startServerInternal(
11601160
socketDetails,
11611161
serverIdx,
11621162
c.firstServer.AdvRPCAddr(), c.demoDir,
1163-
c.stickyEngineRegistry)
1163+
c.stickyVFSRegistry)
11641164
srv, err := server.TestServerFactory.New(args)
11651165
if err != nil {
11661166
return 0, err

pkg/cli/democluster/demo_cluster_test.go

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ func TestTestServerArgsForTransientCluster(t *testing.T) {
5151
defer leaktest.AfterTest(t)()
5252
defer log.Scope(t).Close(t)
5353

54-
stickyEnginesRegistry := server.NewStickyInMemEnginesRegistry()
54+
stickyVFSRegistry := server.NewStickyVFSRegistry()
5555

5656
testCases := []struct {
5757
serverIdx int
@@ -81,7 +81,7 @@ func TestTestServerArgsForTransientCluster(t *testing.T) {
8181
EnableDemoLoginEndpoint: true,
8282
Knobs: base.TestingKnobs{
8383
Server: &server.TestingKnobs{
84-
StickyEngineRegistry: stickyEnginesRegistry,
84+
StickyVFSRegistry: stickyVFSRegistry,
8585
},
8686
},
8787
},
@@ -106,7 +106,7 @@ func TestTestServerArgsForTransientCluster(t *testing.T) {
106106
EnableDemoLoginEndpoint: true,
107107
Knobs: base.TestingKnobs{
108108
Server: &server.TestingKnobs{
109-
StickyEngineRegistry: stickyEnginesRegistry,
109+
StickyVFSRegistry: stickyVFSRegistry,
110110
},
111111
},
112112
},
@@ -120,15 +120,15 @@ func TestTestServerArgsForTransientCluster(t *testing.T) {
120120
demoCtx.CacheSize = tc.cacheSize
121121
demoCtx.SQLPort = 1234
122122
demoCtx.HTTPPort = 4567
123-
actual := demoCtx.testServerArgsForTransientCluster(unixSocketDetails{}, tc.serverIdx, tc.joinAddr, "", stickyEnginesRegistry)
123+
actual := demoCtx.testServerArgsForTransientCluster(unixSocketDetails{}, tc.serverIdx, tc.joinAddr, "", stickyVFSRegistry)
124124
stopper := actual.Stopper
125125
defer stopper.Stop(context.Background())
126126

127127
assert.Len(t, actual.StoreSpecs, 1)
128128
assert.Equal(
129129
t,
130130
fmt.Sprintf("demo-server%d", tc.serverIdx),
131-
actual.StoreSpecs[0].StickyInMemoryEngineID,
131+
actual.StoreSpecs[0].StickyVFSID,
132132
)
133133

134134
// We cannot compare these.
@@ -169,13 +169,13 @@ func TestTransientClusterSimulateLatencies(t *testing.T) {
169169

170170
// Setup the transient cluster.
171171
c := transientCluster{
172-
demoCtx: demoCtx,
173-
stopper: stop.NewStopper(),
174-
demoDir: certsDir,
175-
stickyEngineRegistry: server.NewStickyInMemEnginesRegistry(),
176-
infoLog: log.Infof,
177-
warnLog: log.Warningf,
178-
shoutLog: log.Ops.Shoutf,
172+
demoCtx: demoCtx,
173+
stopper: stop.NewStopper(),
174+
demoDir: certsDir,
175+
stickyVFSRegistry: server.NewStickyVFSRegistry(),
176+
infoLog: log.Infof,
177+
warnLog: log.Warningf,
178+
shoutLog: log.Ops.Shoutf,
179179
}
180180
// Stop the cluster when the test exits, including when it fails.
181181
// This also calls the Stop() method on the stopper, and thus
@@ -281,13 +281,13 @@ func TestTransientClusterMultitenant(t *testing.T) {
281281

282282
// Setup the transient cluster.
283283
c := transientCluster{
284-
demoCtx: demoCtx,
285-
stopper: stop.NewStopper(),
286-
demoDir: certsDir,
287-
stickyEngineRegistry: server.NewStickyInMemEnginesRegistry(),
288-
infoLog: log.Infof,
289-
warnLog: log.Warningf,
290-
shoutLog: log.Ops.Shoutf,
284+
demoCtx: demoCtx,
285+
stopper: stop.NewStopper(),
286+
demoDir: certsDir,
287+
stickyVFSRegistry: server.NewStickyVFSRegistry(),
288+
infoLog: log.Infof,
289+
warnLog: log.Warningf,
290+
shoutLog: log.Ops.Shoutf,
291291
}
292292
// Stop the cluster when the test exits, including when it fails.
293293
// This also calls the Stop() method on the stopper, and thus

pkg/kv/kvserver/client_lease_test.go

Lines changed: 13 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -783,8 +783,7 @@ func TestLeasePreferencesRebalance(t *testing.T) {
783783
func TestLeaseholderRelocate(t *testing.T) {
784784
defer leaktest.AfterTest(t)()
785785
defer log.Scope(t).Close(t)
786-
stickyRegistry := server.NewStickyInMemEnginesRegistry()
787-
defer stickyRegistry.CloseAllStickyInMemEngines()
786+
stickyRegistry := server.NewStickyVFSRegistry()
788787
ctx := context.Background()
789788
manualClock := hlc.NewHybridManualClock()
790789

@@ -809,14 +808,14 @@ func TestLeaseholderRelocate(t *testing.T) {
809808
Locality: localities[i],
810809
Knobs: base.TestingKnobs{
811810
Server: &server.TestingKnobs{
812-
WallClock: manualClock,
813-
StickyEngineRegistry: stickyRegistry,
811+
WallClock: manualClock,
812+
StickyVFSRegistry: stickyRegistry,
814813
},
815814
},
816815
StoreSpecs: []base.StoreSpec{
817816
{
818-
InMemory: true,
819-
StickyInMemoryEngineID: strconv.FormatInt(int64(i), 10),
817+
InMemory: true,
818+
StickyVFSID: strconv.FormatInt(int64(i), 10),
820819
},
821820
},
822821
}
@@ -919,8 +918,7 @@ func TestLeasePreferencesDuringOutage(t *testing.T) {
919918
skip.WithIssue(t, 88769, "flaky test")
920919
defer log.Scope(t).Close(t)
921920

922-
stickyRegistry := server.NewStickyInMemEnginesRegistry()
923-
defer stickyRegistry.CloseAllStickyInMemEngines()
921+
stickyRegistry := server.NewStickyVFSRegistry()
924922
ctx := context.Background()
925923
manualClock := hlc.NewHybridManualClock()
926924
// Place all the leases in the us.
@@ -956,7 +954,7 @@ func TestLeasePreferencesDuringOutage(t *testing.T) {
956954
Server: &server.TestingKnobs{
957955
WallClock: manualClock,
958956
DefaultZoneConfigOverride: &zcfg,
959-
StickyEngineRegistry: stickyRegistry,
957+
StickyVFSRegistry: stickyRegistry,
960958
},
961959
Store: &kvserver.StoreTestingKnobs{
962960
// The Raft leadership may not end up on the eu node, but it needs to
@@ -966,8 +964,8 @@ func TestLeasePreferencesDuringOutage(t *testing.T) {
966964
},
967965
StoreSpecs: []base.StoreSpec{
968966
{
969-
InMemory: true,
970-
StickyInMemoryEngineID: strconv.FormatInt(int64(i), 10),
967+
InMemory: true,
968+
StickyVFSID: strconv.FormatInt(int64(i), 10),
971969
},
972970
},
973971
}
@@ -1128,8 +1126,7 @@ func TestLeasesDontThrashWhenNodeBecomesSuspect(t *testing.T) {
11281126
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
11291127

11301128
// Speed up lease transfers.
1131-
stickyRegistry := server.NewStickyInMemEnginesRegistry()
1132-
defer stickyRegistry.CloseAllStickyInMemEngines()
1129+
stickyRegistry := server.NewStickyVFSRegistry()
11331130
manualClock := hlc.NewHybridManualClock()
11341131
serverArgs := make(map[int]base.TestServerArgs)
11351132
numNodes := 4
@@ -1141,13 +1138,13 @@ func TestLeasesDontThrashWhenNodeBecomesSuspect(t *testing.T) {
11411138
Server: &server.TestingKnobs{
11421139
WallClock: manualClock,
11431140
DefaultZoneConfigOverride: &zcfg,
1144-
StickyEngineRegistry: stickyRegistry,
1141+
StickyVFSRegistry: stickyRegistry,
11451142
},
11461143
},
11471144
StoreSpecs: []base.StoreSpec{
11481145
{
1149-
InMemory: true,
1150-
StickyInMemoryEngineID: strconv.FormatInt(int64(i), 10),
1146+
InMemory: true,
1147+
StickyVFSID: strconv.FormatInt(int64(i), 10),
11511148
},
11521149
},
11531150
}

pkg/kv/kvserver/client_metrics_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,17 +265,17 @@ func TestStoreMetrics(t *testing.T) {
265265
defer log.Scope(t).Close(t)
266266

267267
ctx := context.Background()
268-
stickyEngineRegistry := server.NewStickyInMemEnginesRegistry()
269-
defer stickyEngineRegistry.CloseAllStickyInMemEngines()
268+
stickyVFSRegistry := server.NewStickyVFSRegistry(server.ReuseEnginesDeprecated)
269+
defer stickyVFSRegistry.CloseAllEngines()
270270
const numServers int = 3
271271
stickyServerArgs := make(map[int]base.TestServerArgs)
272272
for i := 0; i < numServers; i++ {
273273
stickyServerArgs[i] = base.TestServerArgs{
274274
CacheSize: 1 << 20, /* 1 MiB */
275275
StoreSpecs: []base.StoreSpec{
276276
{
277-
InMemory: true,
278-
StickyInMemoryEngineID: strconv.FormatInt(int64(i), 10),
277+
InMemory: true,
278+
StickyVFSID: strconv.FormatInt(int64(i), 10),
279279
// Specify a size to trigger the BlockCache in Pebble.
280280
Size: base.SizeSpec{
281281
InBytes: 512 << 20, /* 512 MiB */
@@ -284,7 +284,7 @@ func TestStoreMetrics(t *testing.T) {
284284
},
285285
Knobs: base.TestingKnobs{
286286
Server: &server.TestingKnobs{
287-
StickyEngineRegistry: stickyEngineRegistry,
287+
StickyVFSRegistry: stickyVFSRegistry,
288288
},
289289
Store: &kvserver.StoreTestingKnobs{
290290
DisableRaftLogQueue: true,

0 commit comments

Comments
 (0)