@@ -2368,50 +2368,57 @@ func (d *BackupRestoreTestDriver) createBackupCollection(
23682368 return d .saveContents (ctx , l , rng , & collection , fingerprintAOST )
23692369}
23702370
2371- // deleteUserTableSST deletes an SST that covers a user table from the full
2372- // backup in a collection. This is used to test online restore recovery. We
2373- // delete from the full backup specifically to avoid deleting an SST from an
2374- // incremental that is skipped due to a compacted backup. This ensures that
2375- // deleting an SST will cause the download job to fail (assuming it hasn't
2376- // already been downloaded).
2377- // Note: We delete specifically a user-table SST as opposed to choosing a random
2378- // SST because the temporary system table SSTs may be GC'd before the download
2379- // phase attempts to download the SST. This would cause the download job to
2380- // succeed instead of failing as expected. See
2381- // https://github.com/cockroachdb/cockroach/issues/148408 for more details.
2382- // TODO (kev-cao): Investigate if blocking storage level compaction would
2383- // prevent GC of temporary system tables and allow us to delete a random SST
2384- // instead. Technically, it is still possible, but unlikely, for storage to
2385- // compact away a user-table SST before the download job attempts to download
2386- // it, which would result in false positives in the test.
2387- func (d * BackupRestoreTestDriver ) deleteUserTableSST (
2371+ // deleteSSTFromBackupLayers deletes one SST from each layer of a backup
2372+ // collection. This is used to test online restore recovery. Deleting an SST
2373+ // from each layer ensures that at least one SST required for a restore will be
2374+ // missing.
2375+ func (d * BackupRestoreTestDriver ) deleteSSTFromBackupLayers (
23882376 ctx context.Context , l * logger.Logger , db * gosql.DB , collection * backupCollection ,
23892377) error {
2390- var sstPath string
2391- var startPretty , endPretty string
2392- var sizeBytes , numRows int
2393- if err := db .QueryRowContext (
2378+ type sstInfo struct {
2379+ path , start , end string
2380+ bytes , rows int
2381+ }
2382+ rows , err := db .QueryContext (
23942383 ctx ,
2395- `SELECT path, start_pretty, end_pretty, size_bytes, rows FROM
2396- (
2397- SELECT row_number() OVER (), * FROM
2398- [SHOW BACKUP FILES FROM LATEST IN $1]
2384+ // We delete the last SST in each directory just to ensure that we always
2385+ // delete user data. If a system table data is deleted, it is possible for
2386+ // the temporary system table to be GC'd before the download phase detects
2387+ // the missing file.
2388+ `WITH with_dir AS (
2389+ SELECT *, regexp_replace(path, '/[^/]+\.sst$', '') AS dir
2390+ FROM [SHOW BACKUP FILES FROM LATEST IN $1]
2391+ )
2392+ SELECT path, start_pretty, end_pretty, size_bytes, rows
2393+ FROM (
2394+ SELECT *, row_number() OVER (PARTITION BY dir ORDER BY path DESC) AS rn
2395+ FROM with_dir
23992396 )
2400- WHERE backup_type = 'full'
2401- ORDER BY row_number DESC
2402- LIMIT 1` ,
2397+ WHERE rn = 1` ,
24032398 collection .uri (),
2404- ).Scan (& sstPath , & startPretty , & endPretty , & sizeBytes , & numRows ); err != nil {
2405- return errors .Wrapf (err , "failed to get SST path from %s" , collection .uri ())
2399+ )
2400+ if err != nil {
2401+ return errors .Wrapf (err , "failed to query for backup files in %q" , collection .uri ())
2402+ }
2403+ defer rows .Close ()
2404+ var ssts []sstInfo
2405+ for rows .Next () {
2406+ var sst sstInfo
2407+ if err := rows .Scan (& sst .path , & sst .start , & sst .end , & sst .bytes , & sst .rows ); err != nil {
2408+ return errors .Wrap (err , "error scanning SHOW BACKUP FILES row" )
2409+ }
2410+ ssts = append (ssts , sst )
2411+ }
2412+ if rows .Err () != nil {
2413+ return errors .Wrap (rows .Err (), "error iterating over SHOW BACKUP FILES" )
2414+ }
2415+ if len (ssts ) == 0 {
2416+ return errors .Newf ("unexpectedly found no SST files to delete in %q" , collection .uri ())
24062417 }
24072418 uri , err := url .Parse (collection .uri ())
24082419 if err != nil {
24092420 return errors .Wrapf (err , "failed to parse backup collection URI %q" , collection .uri ())
24102421 }
2411- l .Printf (
2412- "deleting sst %s at %s: covering %d bytes and %d rows in [%s, %s)" ,
2413- sstPath , uri , sizeBytes , numRows , startPretty , endPretty ,
2414- )
24152422 storage , err := cloud .ExternalStorageFromURI (
24162423 ctx ,
24172424 collection .uri (),
@@ -2427,13 +2434,22 @@ func (d *BackupRestoreTestDriver) deleteUserTableSST(
24272434 return errors .Wrapf (err , "failed to create external storage for %q" , collection .uri ())
24282435 }
24292436 defer storage .Close ()
2430- return errors .Wrapf (
2431- storage .Delete (ctx , sstPath ),
2432- "failed to delete sst %s from backup %s at %s" ,
2433- sstPath ,
2434- collection .name ,
2435- uri ,
2436- )
2437+ for _ , sst := range ssts {
2438+ l .Printf (
2439+ "deleting sst %s at %s: covering %d bytes and %d rows in [%s, %s)" ,
2440+ sst .path , uri , sst .bytes , sst .rows , sst .start , sst .end ,
2441+ )
2442+ if err := storage .Delete (ctx , sst .path ); err != nil {
2443+ return errors .Wrapf (
2444+ err ,
2445+ "failed to delete sst %s from backup %s at %s" ,
2446+ sst .path ,
2447+ collection .name ,
2448+ uri ,
2449+ )
2450+ }
2451+ }
2452+ return nil
24372453}
24382454
24392455// sentinelFilePath returns the path to the file that prevents job
0 commit comments