Skip to content

Commit 6389dc3

Browse files
committed
backup: teach ResolveBackupManifests to use the backup index
This commit teaches ResolveBackupManifests to use the backup index, which prevents unnecessarily reads of backup manifests that are elided during restore. Since backups that are not indexed are still considered restorable in this version, the legacy path of loading all manifests is still preserved and can be removed in 26.2. Epic: CRDB-47942 Fixes: #150330, #150331, #150332 Release note: None
1 parent 8a95227 commit 6389dc3

15 files changed

+612
-144
lines changed

pkg/backup/backup_job.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,7 @@ func backup(
458458
execCtx.User(),
459459
execCtx.ExecCfg().DistSQLSrv.ExternalStorageFromURI,
460460
details,
461+
backupManifest.RevisionStartTime,
461462
); err != nil {
462463
return roachpb.RowCount{}, 0, errors.Wrapf(err, "writing backup index metadata")
463464
}

pkg/backup/backup_test.go

Lines changed: 41 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -4057,6 +4057,10 @@ func TestNonLinearChain(t *testing.T) {
40574057

40584058
sqlDB := sqlutils.MakeSQLRunner(tc.Conns[0])
40594059

4060+
// This ensures a determenistic behavior to the relative number of files we
4061+
// open during a restore when skipping a backup.
4062+
sqlDB.Exec(t, `SET CLUSTER SETTING backup.index.read.enabled = true`)
4063+
40604064
// Make a table with a row in it and make a full backup of it.
40614065
sqlDB.Exec(t, `CREATE TABLE t (a INT PRIMARY KEY)`)
40624066
sqlDB.Exec(t, `INSERT INTO t VALUES (0)`)
@@ -4110,15 +4114,14 @@ func TestNonLinearChain(t *testing.T) {
41104114

41114115
// Restore the same thing -- t2 -- we did before but now with the extra inc
41124116
// spur hanging out in the chain. This should produce the same result, and we
4113-
// would like it to only open two extra file2 to do so -- the manifest that
4114-
// includes the timestamps that then show it is not needed by the restore, and
4115-
// the checksum for said manifest.
4117+
// would like it to only open one extra file to do so, just the index of the
4118+
// incremental to show that it is not needed by the restore.
41164119
sqlDB.Exec(t, `DROP TABLE t`)
41174120
openedBefore = tc.Servers[0].MustGetSQLCounter("cloud.readers_opened")
41184121
sqlDB.Exec(t, `RESTORE TABLE defaultdb.t FROM LATEST IN $1`, localFoo)
41194122
sqlDB.CheckQueryResults(t, `SELECT * FROM t`, [][]string{{"0"}, {"1"}, {"2"}})
41204123
openedB := tc.Servers[0].MustGetSQLCounter("cloud.readers_opened") - openedBefore
4121-
require.Equal(t, openedA+2, openedB)
4124+
require.Equal(t, openedA+1, openedB)
41224125

41234126
// Finally, make sure we can restore from the tip of the spur, not just the
41244127
// tip of the chain.
@@ -6676,6 +6679,9 @@ INSERT INTO baz.bar VALUES (110, 'a'), (210, 'b'), (310, 'c'), (410, 'd'), (510,
66766679
func TestBackupRestoreInsideTenant(t *testing.T) {
66776680
defer leaktest.AfterTest(t)()
66786681
defer log.Scope(t).Close(t)
6682+
6683+
skip.UnderRace(t, "runs slow under race, ~10+ mins")
6684+
66796685
const numAccounts = 1
66806686

66816687
makeTenant := func(srv serverutils.TestServerInterface, tenant uint64) (*sqlutils.SQLRunner, func()) {
@@ -6718,61 +6724,56 @@ func TestBackupRestoreInsideTenant(t *testing.T) {
67186724
defer cleanupT11C2()
67196725

67206726
t.Run("tenant-backup", func(t *testing.T) {
6721-
// This test uses this mock HTTP server to pass the backup files between tenants.
6722-
httpAddr, httpServerCleanup := makeInsecureHTTPServer(t)
6723-
defer httpServerCleanup()
6727+
const collectionURI = "nodelocal://1/tenant-backup"
67246728

6725-
tenant10.Exec(t, `BACKUP INTO $1`, httpAddr)
6729+
tenant10.Exec(t, `BACKUP INTO $1`, collectionURI)
67266730

67276731
t.Run("cluster-restore", func(t *testing.T) {
67286732
t.Run("into-same-tenant-id", func(t *testing.T) {
6729-
tenant10C2.Exec(t, `RESTORE FROM LATEST IN $1`, httpAddr)
6733+
tenant10C2.Exec(t, `RESTORE FROM LATEST IN $1`, collectionURI)
67306734
tenant10C2.CheckQueryResults(t, `SELECT * FROM foo.bar`, tenant10.QueryStr(t, `SELECT * FROM foo.bar`))
67316735
})
67326736
t.Run("into-different-tenant-id", func(t *testing.T) {
6733-
tenant11C2.Exec(t, `RESTORE FROM LATEST IN $1`, httpAddr)
6737+
tenant11C2.Exec(t, `RESTORE FROM LATEST IN $1`, collectionURI)
67346738
tenant11C2.CheckQueryResults(t, `SELECT * FROM foo.bar`, tenant10.QueryStr(t, `SELECT * FROM foo.bar`))
67356739
})
67366740
t.Run("into-system-tenant-id", func(t *testing.T) {
6737-
systemDB2.Exec(t, `RESTORE FROM LATEST IN $1`, httpAddr)
6741+
systemDB2.Exec(t, `RESTORE FROM LATEST IN $1`, collectionURI)
67386742
systemDB2.CheckQueryResults(t, `SELECT * FROM foo.bar`, tenant10.QueryStr(t, `SELECT * FROM foo.bar`))
67396743
})
67406744
})
67416745

67426746
t.Run("database-restore", func(t *testing.T) {
67436747
t.Run("into-same-tenant-id", func(t *testing.T) {
67446748
tenant10.Exec(t, `CREATE DATABASE foo2`)
6745-
tenant10.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, httpAddr)
6749+
tenant10.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, collectionURI)
67466750
tenant10.CheckQueryResults(t, `SELECT * FROM foo2.bar`, tenant10.QueryStr(t, `SELECT * FROM foo.bar`))
67476751
})
67486752
t.Run("into-different-tenant-id", func(t *testing.T) {
67496753
tenant11.Exec(t, `CREATE DATABASE foo`)
6750-
tenant11.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1`, httpAddr)
6754+
tenant11.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1`, collectionURI)
67516755
tenant11.CheckQueryResults(t, `SELECT * FROM foo.bar`, tenant10.QueryStr(t, `SELECT * FROM foo.bar`))
67526756
})
67536757
t.Run("into-system-tenant-id", func(t *testing.T) {
67546758
systemDB.Exec(t, `CREATE DATABASE foo2`)
6755-
systemDB.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, httpAddr)
6759+
systemDB.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, collectionURI)
67566760
systemDB.CheckQueryResults(t, `SELECT * FROM foo2.bar`, tenant10.QueryStr(t, `SELECT * FROM foo.bar`))
67576761
})
67586762
})
67596763
})
67606764

67616765
t.Run("system-backup", func(t *testing.T) {
6762-
// This test uses this mock HTTP server to pass the backup files between tenants.
6763-
httpAddr, httpServerCleanup := makeInsecureHTTPServer(t)
6764-
defer httpServerCleanup()
6766+
const collectionURI = "nodelocal://1/system-backup"
67656767

6766-
systemDB.Exec(t, `BACKUP INTO $1 WITH include_all_virtual_clusters`, httpAddr)
6768+
systemDB.Exec(t, `BACKUP INTO $1 WITH include_all_virtual_clusters`, collectionURI)
67676769

67686770
tenant20C2, cleanupT20C2 := makeTenant(srv2, 20)
67696771
defer cleanupT20C2()
67706772

67716773
t.Run("cluster-restore", func(t *testing.T) {
67726774
// Now restore a cluster backup taken from a system tenant that
67736775
// hasn't created any tenants.
6774-
httpAddrEmpty, cleanupEmptyHTTPServer := makeInsecureHTTPServer(t)
6775-
defer cleanupEmptyHTTPServer()
6776+
const emptyCollectionURI = "nodelocal://1/empty-system-backup"
67766777

67776778
_, emptySystemDB, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode,
67786779
dir, InitManualReplication, base.TestClusterArgs{
@@ -6782,13 +6783,13 @@ func TestBackupRestoreInsideTenant(t *testing.T) {
67826783
})
67836784
defer cleanupEmptyCluster()
67846785

6785-
emptySystemDB.Exec(t, `BACKUP INTO $1`, httpAddrEmpty)
6786-
tenant20C2.Exec(t, `RESTORE FROM LATEST IN $1`, httpAddrEmpty)
6786+
emptySystemDB.Exec(t, `BACKUP INTO $1`, emptyCollectionURI)
6787+
tenant20C2.Exec(t, `RESTORE FROM LATEST IN $1`, emptyCollectionURI)
67876788
})
67886789

67896790
t.Run("database-restore-into-tenant", func(t *testing.T) {
67906791
tenant10.Exec(t, `CREATE DATABASE data`)
6791-
tenant10.Exec(t, `RESTORE TABLE data.bank FROM LATEST IN $1`, httpAddr)
6792+
tenant10.Exec(t, `RESTORE TABLE data.bank FROM LATEST IN $1`, collectionURI)
67926793
systemDB.CheckQueryResults(t, `SELECT * FROM data.bank`, tenant10.QueryStr(t, `SELECT * FROM data.bank`))
67936794
})
67946795

@@ -6814,9 +6815,8 @@ func TestBackupRestoreTenantSettings(t *testing.T) {
68146815
_ = securitytest.EmbeddedTenantIDs()
68156816

68166817
systemDB.Exec(t, `ALTER TENANT ALL SET CLUSTER SETTING sql.notices.enabled = false`)
6817-
backup2HttpAddr, backup2Cleanup := makeInsecureHTTPServer(t)
6818-
defer backup2Cleanup()
6819-
systemDB.Exec(t, `BACKUP INTO $1`, backup2HttpAddr)
6818+
const collectionURI = "nodelocal://1/backup"
6819+
systemDB.Exec(t, `BACKUP INTO $1`, collectionURI)
68206820

68216821
_, systemDB2, cleanupDB2 := backupRestoreTestSetupEmpty(t, singleNode, dir, InitManualReplication, base.TestClusterArgs{
68226822
ServerArgs: base.TestServerArgs{
@@ -6825,7 +6825,7 @@ func TestBackupRestoreTenantSettings(t *testing.T) {
68256825
})
68266826
defer cleanupDB2()
68276827
t.Run("cluster-restore-into-cluster-with-tenant-settings-succeeds", func(t *testing.T) {
6828-
systemDB2.Exec(t, `RESTORE FROM LATEST IN $1`, backup2HttpAddr)
6828+
systemDB2.Exec(t, `RESTORE FROM LATEST IN $1`, collectionURI)
68296829
systemDB2.CheckQueryResults(t, `SELECT * FROM system.tenant_settings`, systemDB.QueryStr(t, `SELECT * FROM system.tenant_settings`))
68306830
})
68316831
}
@@ -6892,60 +6892,55 @@ func TestBackupRestoreInsideMultiPodTenant(t *testing.T) {
68926892
tenant10[0].Exec(t, `CREATE DATABASE foo; CREATE TABLE foo.bar(i int primary key); INSERT INTO foo.bar VALUES (110), (210), (310)`)
68936893

68946894
t.Run("tenant-backup", func(t *testing.T) {
6895-
// This test uses this mock HTTP server to pass the backup files between tenants.
6896-
httpAddr, httpServerCleanup := makeInsecureHTTPServer(t)
6897-
defer httpServerCleanup()
6895+
const collectionURI = "nodelocal://1/tenant-backup"
68986896

6899-
tenant10[0].Exec(t, `BACKUP INTO $1`, httpAddr)
6897+
tenant10[0].Exec(t, `BACKUP INTO $1`, collectionURI)
69006898

69016899
t.Run("cluster-restore", func(t *testing.T) {
69026900
t.Run("into-same-tenant-id", func(t *testing.T) {
6903-
tenant10C2[0].Exec(t, `RESTORE FROM LATEST IN $1`, httpAddr)
6901+
tenant10C2[0].Exec(t, `RESTORE FROM LATEST IN $1`, collectionURI)
69046902
tenant10C2[0].CheckQueryResults(t, `SELECT * FROM foo.bar`, tenant10[0].QueryStr(t, `SELECT * FROM foo.bar`))
69056903
})
69066904
t.Run("into-different-tenant-id", func(t *testing.T) {
6907-
tenant11C2.Exec(t, `RESTORE FROM LATEST IN $1`, httpAddr)
6905+
tenant11C2.Exec(t, `RESTORE FROM LATEST IN $1`, collectionURI)
69086906
tenant11C2.CheckQueryResults(t, `SELECT * FROM foo.bar`, tenant10[0].QueryStr(t, `SELECT * FROM foo.bar`))
69096907
})
69106908
t.Run("into-system-tenant-id", func(t *testing.T) {
6911-
systemDB2.Exec(t, `RESTORE FROM LATEST IN $1`, httpAddr)
6909+
systemDB2.Exec(t, `RESTORE FROM LATEST IN $1`, collectionURI)
69126910
})
69136911
})
69146912

69156913
t.Run("database-restore", func(t *testing.T) {
69166914
t.Run("into-same-tenant-id", func(t *testing.T) {
69176915
tenant10[0].Exec(t, `CREATE DATABASE foo2`)
6918-
tenant10[0].Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, httpAddr)
6916+
tenant10[0].Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, collectionURI)
69196917
tenant10[0].CheckQueryResults(t, `SELECT * FROM foo2.bar`, tenant10[0].QueryStr(t, `SELECT * FROM foo.bar`))
69206918
})
69216919
t.Run("into-different-tenant-id", func(t *testing.T) {
69226920
tenant11[0].Exec(t, `CREATE DATABASE foo`)
6923-
tenant11[0].Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1`, httpAddr)
6921+
tenant11[0].Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1`, collectionURI)
69246922
tenant11[0].CheckQueryResults(t, `SELECT * FROM foo.bar`, tenant10[0].QueryStr(t, `SELECT * FROM foo.bar`))
69256923
})
69266924
t.Run("into-system-tenant-id", func(t *testing.T) {
69276925
systemDB.Exec(t, `CREATE DATABASE foo2`)
6928-
systemDB.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, httpAddr)
6926+
systemDB.Exec(t, `RESTORE TABLE foo.bar FROM LATEST IN $1 WITH into_db='foo2'`, collectionURI)
69296927
systemDB.CheckQueryResults(t, `SELECT * FROM foo2.bar`, tenant10[0].QueryStr(t, `SELECT * FROM foo.bar`))
69306928
})
69316929
})
69326930
})
69336931

69346932
t.Run("system-backup", func(t *testing.T) {
6935-
// This test uses this mock HTTP server to pass the backup files between tenants.
6936-
httpAddr, httpServerCleanup := makeInsecureHTTPServer(t)
6937-
defer httpServerCleanup()
6933+
const collectionURI = "nodelocal://1/system-backup"
69386934

6939-
systemDB.Exec(t, `BACKUP INTO $1 WITH include_all_virtual_clusters`, httpAddr)
6935+
systemDB.Exec(t, `BACKUP INTO $1 WITH include_all_virtual_clusters`, collectionURI)
69406936

69416937
tenant20C2, cleanupT20C2 := makeTenant(srv2, 20, false)
69426938
defer cleanupT20C2()
69436939

69446940
t.Run("cluster-restore", func(t *testing.T) {
69456941
// Now restore a cluster backup taken from a system tenant that
69466942
// hasn't created any tenants.
6947-
httpAddrEmpty, cleanupEmptyHTTPServer := makeInsecureHTTPServer(t)
6948-
defer cleanupEmptyHTTPServer()
6943+
const emptyCollectionURI = "nodelocal://1/empty-system-backup"
69496944

69506945
_, emptySystemDB, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode,
69516946
dir, InitManualReplication, base.TestClusterArgs{
@@ -6955,13 +6950,13 @@ func TestBackupRestoreInsideMultiPodTenant(t *testing.T) {
69556950
})
69566951
defer cleanupEmptyCluster()
69576952

6958-
emptySystemDB.Exec(t, `BACKUP INTO $1`, httpAddrEmpty)
6959-
tenant20C2.Exec(t, `RESTORE FROM LATEST IN $1`, httpAddrEmpty)
6953+
emptySystemDB.Exec(t, `BACKUP INTO $1`, emptyCollectionURI)
6954+
tenant20C2.Exec(t, `RESTORE FROM LATEST IN $1`, emptyCollectionURI)
69606955
})
69616956

69626957
t.Run("database-restore-into-tenant", func(t *testing.T) {
69636958
tenant10[0].Exec(t, `CREATE DATABASE data`)
6964-
tenant10[0].Exec(t, `RESTORE TABLE data.bank FROM LATEST IN $1`, httpAddr)
6959+
tenant10[0].Exec(t, `RESTORE TABLE data.bank FROM LATEST IN $1`, collectionURI)
69656960
systemDB.CheckQueryResults(t, `SELECT * FROM data.bank`, tenant10[0].QueryStr(t, `SELECT * FROM data.bank`))
69666961
})
69676962
})

pkg/backup/backupdest/BUILD.bazel

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ go_library(
3131
"//pkg/util/encoding",
3232
"//pkg/util/hlc",
3333
"//pkg/util/ioctx",
34+
"//pkg/util/log",
3435
"//pkg/util/metamorphic",
3536
"//pkg/util/mon",
3637
"//pkg/util/protoutil",
3738
"//pkg/util/timeutil",
3839
"//pkg/util/tracing",
3940
"@com_github_cockroachdb_errors//:errors",
41+
"@org_golang_x_sync//errgroup",
4042
],
4143
)
4244

@@ -73,6 +75,7 @@ go_test(
7375
"//pkg/settings/cluster",
7476
"//pkg/sql",
7577
"//pkg/testutils",
78+
"//pkg/testutils/jobutils",
7679
"//pkg/testutils/serverutils",
7780
"//pkg/testutils/testcluster",
7881
"//pkg/util",

0 commit comments

Comments
 (0)