@@ -7,6 +7,7 @@ package metamorphic
77import (
88 "fmt"
99 "io"
10+ "math/rand/v2"
1011 "os"
1112 "path"
1213 "runtime/debug"
@@ -321,10 +322,11 @@ func (t *Test) minFMV() pebble.FormatMajorVersion {
321322 return minVersion
322323}
323324
324- func (t * Test ) restartDB (dbID objID ) error {
325+ func (t * Test ) restartDB (dbID objID , shouldCrashDuringOpen bool ) error {
325326 db := t .getDB (dbID )
326- // If strictFS is not used, we use pebble.NoSync for writeOpts, so we can't
327- // restart the database (even if we don't revert to synced data).
327+ // If strictFS is not used, no-op since we end up using pebble.NoSync for
328+ // writeOpts. In the case of pebble.NoSync, we can't restart the database
329+ // even if we don't revert to synced data.
328330 if ! t .testOpts .strictFS {
329331 return nil
330332 }
@@ -348,6 +350,19 @@ func (t *Test) restartDB(dbID objID) error {
348350 }
349351 }
350352 t .opts .FS = crashFS
353+ var slowFS * errorfs.FS
354+ // If we should crash during Open, inject some latency into the filesystem
355+ // so that the first Open is slow enough for us to capture some arbitrary
356+ // intermediate state.
357+ if shouldCrashDuringOpen {
358+ seed := time .Now ().UnixNano ()
359+ t .opts .Logger .Infof ("seed %d" , seed )
360+ mean := time .Duration (rand .IntN (20 ) + 10 * int (time .Millisecond ))
361+ t .opts .Logger .Infof ("Injecting mean %s of latency with p=%.3f" , mean , 1.0 )
362+ slowFS = errorfs .Wrap (crashFS ,
363+ errorfs .RandomLatency (errorfs .Randomly (1.0 , seed ), mean , seed , time .Second ))
364+ t .opts .FS = slowFS
365+ }
351366 t .opts .WithFSDefaults ()
352367 // We want to set the new FS in testOpts too, so they are propagated to the
353368 // TestOptions that were used with metamorphic.New().
@@ -357,6 +372,9 @@ func (t *Test) restartDB(dbID objID) error {
357372 t .testOpts .Opts .WALFailover .Secondary .FS = t .opts .FS
358373 }
359374
375+ secondOpenDone := make (chan struct {})
376+ firstOpenDone := make (chan struct {})
377+ errChan := make (chan error )
360378 // TODO(jackson): Audit errorRate and ensure custom options' hooks semantics
361379 // are well defined within the context of retries.
362380 err := t .withRetries (func () (err error ) {
@@ -373,15 +391,66 @@ func (t *Test) restartDB(dbID objID) error {
373391 dir = path .Join (dir , fmt .Sprintf ("db%d" , dbID .slot ()))
374392 }
375393 o := t .finalizeOptions ()
394+ if shouldCrashDuringOpen {
395+ go t .simulateCrashDuringOpen (dbID , slowFS , secondOpenDone , firstOpenDone , errChan )
396+ }
376397 t .dbs [dbID .slot ()- 1 ], err = pebble .Open (dir , & o )
377- if err != nil {
378- return err
398+ if shouldCrashDuringOpen {
399+ firstOpenDone <- struct {}{}
379400 }
380401 return err
381402 })
403+ if shouldCrashDuringOpen {
404+ <- secondOpenDone
405+ select {
406+ case err = <- errChan :
407+ if err != nil {
408+ return err
409+ }
410+ default :
411+ }
412+ }
382413 return err
383414}
384415
416+ func (t * Test ) simulateCrashDuringOpen (
417+ dbID objID , slowFS * errorfs.FS , secondOpenDone , firstOpenDone chan struct {}, errChan chan error ,
418+ ) {
419+ defer func () { secondOpenDone <- struct {}{} }()
420+
421+ // Wait a bit for the first Open to make some progress.
422+ time .Sleep (30 * time .Millisecond )
423+
424+ // Create a crash clone of the current filesystem state.
425+ dir := t .dir
426+ if len (t .dbs ) > 1 {
427+ dir = path .Join (dir , fmt .Sprintf ("db%d" , dbID .slot ()))
428+ }
429+ crashCloneFS , err := slowFS .CrashClone (vfs.CrashCloneCfg {UnsyncedDataPercent : 0 })
430+ if err != nil {
431+ errChan <- err
432+ return
433+ }
434+ t .opts .FS = crashCloneFS
435+
436+ // Create a copy of options for the second DB.
437+ o := t .finalizeOptions ()
438+
439+ // After the first Open has completed, close the resulting DB and open the
440+ // second DB.
441+ <- firstOpenDone
442+ err = t .dbs [dbID .slot ()- 1 ].Close ()
443+ if err != nil {
444+ errChan <- err
445+ return
446+ }
447+ t .dbs [dbID .slot ()- 1 ], err = pebble .Open (dir , & o )
448+ if err != nil {
449+ errChan <- err
450+ return
451+ }
452+ }
453+
385454func (t * Test ) saveInMemoryDataInternal () error {
386455 if rootFS := vfs .Root (t .opts .FS ); rootFS != vfs .Default {
387456 // t.opts.FS is an in-memory system; copy it to disk.
0 commit comments