@@ -7,6 +7,7 @@ package metamorphic
77import (
88 "fmt"
99 "io"
10+ "math/rand/v2"
1011 "os"
1112 "path"
1213 "runtime/debug"
@@ -321,10 +322,12 @@ func (t *Test) minFMV() pebble.FormatMajorVersion {
321322 return minVersion
322323}
323324
324- func (t * Test ) restartDB (dbID objID ) error {
325+ // copy the walls explicity
326+ func (t * Test ) restartDB (dbID objID , shouldCrashDuringOpen bool ) error {
325327 db := t .getDB (dbID )
326- // If strictFS is not used, we use pebble.NoSync for writeOpts, so we can't
327- // restart the database (even if we don't revert to synced data).
328+ // If strictFS is not used, no-op since we end up using pebble.NoSync for
329+ // writeOpts. In the case of pebble.NoSync, we can't restart the database
330+ // even if we don't revert to synced data.
328331 if ! t .testOpts .strictFS {
329332 return nil
330333 }
@@ -348,15 +351,26 @@ func (t *Test) restartDB(dbID objID) error {
348351 }
349352 }
350353 t .opts .FS = crashFS
354+ var slowFS * errorfs.FS
355+ // If we should crash during Open, inject some latency into the filesystem
356+ // so that the first Open is slow enough for us to capture some arbitrary
357+ // intermediate state.
358+ if shouldCrashDuringOpen {
359+ seed := time .Now ().UnixNano ()
360+ t .opts .Logger .Infof ("seed %d" , seed )
361+ mean := time .Duration (rand .IntN (20 ) + 10 * int (time .Millisecond ))
362+ t .opts .Logger .Infof ("Injecting mean %s of latency with p=%.3f" , mean , 1.0 )
363+ slowFS = errorfs .Wrap (crashFS ,
364+ errorfs .RandomLatency (errorfs .Randomly (1.0 , seed ), mean , seed , time .Second ))
365+ t .opts .FS = slowFS
366+ }
351367 t .opts .WithFSDefaults ()
352368 // We want to set the new FS in testOpts too, so they are propagated to the
353369 // TestOptions that were used with metamorphic.New().
354370 t .testOpts .Opts .FS = t .opts .FS
355- if t .opts .WALFailover != nil {
356- t .opts .WALFailover .Secondary .FS = t .opts .FS
357- t .testOpts .Opts .WALFailover .Secondary .FS = t .opts .FS
358- }
359371
372+ secondOpenDone := make (chan struct {})
373+ firstOpenDone := make (chan struct {})
360374 // TODO(jackson): Audit errorRate and ensure custom options' hooks semantics
361375 // are well defined within the context of retries.
362376 err := t .withRetries (func () (err error ) {
@@ -373,15 +387,72 @@ func (t *Test) restartDB(dbID objID) error {
373387 dir = path .Join (dir , fmt .Sprintf ("db%d" , dbID .slot ()))
374388 }
375389 o := t .finalizeOptions ()
390+ fmt .Println ("HEREREEERERE shouldCrashDuringOpen" , shouldCrashDuringOpen )
391+ if shouldCrashDuringOpen {
392+ go func () {
393+ err = t .simulateCrashDuringOpen (dbID , slowFS , secondOpenDone , firstOpenDone )
394+ }()
395+ if err != nil {
396+ return err
397+ }
398+ }
376399 t .dbs [dbID .slot ()- 1 ], err = pebble .Open (dir , & o )
377- if err != nil {
378- return err
400+ if shouldCrashDuringOpen {
401+ firstOpenDone <- struct {}{}
379402 }
380403 return err
381404 })
405+ if shouldCrashDuringOpen {
406+ <- secondOpenDone
407+ }
382408 return err
383409}
384410
411+ func (t * Test ) simulateCrashDuringOpen (
412+ dbID objID , slowFS * errorfs.FS , secondOpenDone , firstOpenDone chan struct {},
413+ ) error {
414+ fmt .Println ("HEREREEERERE simulateCrashDuringOpen" )
415+ defer func () { secondOpenDone <- struct {}{} }()
416+
417+ // Wait a bit for the first Open to make some progress.
418+ time .Sleep (30 * time .Millisecond )
419+
420+ // Create a crash clone of the current filesystem state.
421+ dir := t .dir
422+ if len (t .dbs ) > 1 {
423+ dir = path .Join (dir , fmt .Sprintf ("db%d" , dbID .slot ()))
424+ }
425+ crashCloneFS , err := slowFS .CrashClone (vfs.CrashCloneCfg {UnsyncedDataPercent : 0 })
426+ if err != nil {
427+ return err
428+ }
429+
430+ // After the first Open has completed, close the resulting DB and open the
431+ // second DB.
432+ <- firstOpenDone
433+ err = t .dbs [dbID .slot ()- 1 ].Close ()
434+ if err != nil {
435+ return err
436+ }
437+ t .opts .FS = crashCloneFS
438+ if t .opts .WALFailover != nil {
439+ fmt .Println ("WALFAILOVER" )
440+ ccsmemFS := t .opts .WALFailover .Secondary .FS .(* vfs.MemFS )
441+ crashCloneSecondaryFS := ccsmemFS .CrashClone (vfs.CrashCloneCfg {UnsyncedDataPercent : 0 })
442+ t .testOpts .Opts .WALFailover .Secondary .FS = crashCloneSecondaryFS
443+ t .opts .WALFailover .Secondary .FS = crashCloneSecondaryFS
444+ fmt .Println ("[HERE] crashCloneFS" , crashCloneFS .String ())
445+ fmt .Println ("[HERE] crashCloneSecondaryFS" , crashCloneSecondaryFS .String ())
446+ }
447+ // Create a copy of options for the second DB.
448+ o := t .finalizeOptions ()
449+ t .dbs [dbID .slot ()- 1 ], err = pebble .Open (dir , & o )
450+ if err != nil {
451+ return err
452+ }
453+ return nil
454+ }
455+
385456func (t * Test ) saveInMemoryDataInternal () error {
386457 if rootFS := vfs .Root (t .opts .FS ); rootFS != vfs .Default {
387458 // t.opts.FS is an in-memory system; copy it to disk.
0 commit comments