Skip to content

Commit 3c58666

Browse files
authored
go/cmd/vtbackup: add --restore-from-clone support to vtbackup (#19089)
Signed-off-by: Max Englander <max@planetscale.com>
1 parent cf1a804 commit 3c58666

File tree

5 files changed

+221
-39
lines changed

5 files changed

+221
-39
lines changed

go/cmd/vtbackup/cli/vtbackup.go

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ var (
9393
initShard string
9494
concurrency = 4
9595
incrementalFromPos string
96+
restoreWithClone bool
9697

9798
// mysqlctld-like flags
9899
mysqlPort = 3306
@@ -157,7 +158,7 @@ When run periodically for each shard, vtbackup can ensure these configurable pol
157158
* Old backups for the shard are removed.
158159
159160
Whatever system launches vtbackup is responsible for the following:
160-
- Running vtbackup with similar flags that would be used for a vttablet and
161+
- Running vtbackup with similar flags that would be used for a vttablet and
161162
mysqlctld in the target shard to be backed up.
162163
163164
- Provisioning as much disk space for vtbackup as would be given to vttablet.
@@ -226,6 +227,7 @@ func init() {
226227
utils.SetFlagStringVar(Main.Flags(), &initShard, "init-shard", initShard, "(init parameter) shard to use for this tablet")
227228
Main.Flags().IntVar(&concurrency, "concurrency", concurrency, "(init restore parameter) how many concurrent files to restore at once")
228229
utils.SetFlagStringVar(Main.Flags(), &incrementalFromPos, "incremental-from-pos", incrementalFromPos, "Position, or name of backup from which to create an incremental backup. Default: empty. If given, then this backup becomes an incremental backup from given position or given backup. If value is 'auto', this backup will be taken from the last successful backup position.")
230+
Main.Flags().BoolVar(&restoreWithClone, "restore-with-clone", restoreWithClone, "(init parameter) will perform the restore phase with MySQL CLONE, requires either --clone-from-primary or --clone-from-tablet")
229231

230232
// mysqlctld-like flags
231233
utils.SetFlagIntVar(Main.Flags(), &mysqlPort, "mysql-port", mysqlPort, "MySQL port")
@@ -457,42 +459,49 @@ func takeBackup(ctx, backgroundCtx context.Context, topoServer *topo.Server, bac
457459
return nil
458460
}
459461

460-
phase.Set(phaseNameRestoreLastBackup, int64(1))
461-
defer phase.Set(phaseNameRestoreLastBackup, int64(0))
462-
backupDir := mysqlctl.GetBackupDir(initKeyspace, initShard)
463-
log.Infof("Restoring latest backup from directory %v", backupDir)
464-
restoreAt := time.Now()
465-
params := mysqlctl.RestoreParams{
466-
Cnf: mycnf,
467-
Mysqld: mysqld,
468-
Logger: logutil.NewConsoleLogger(),
469-
Concurrency: concurrency,
470-
HookExtraEnv: extraEnv,
471-
DeleteBeforeRestore: true,
472-
DbName: dbName,
473-
Keyspace: initKeyspace,
474-
Shard: initShard,
475-
Stats: backupstats.RestoreStats(),
476-
MysqlShutdownTimeout: mysqlShutdownTimeout,
477-
}
478-
backupManifest, err := mysqlctl.Restore(ctx, params)
479462
var restorePos replication.Position
480-
switch err {
481-
case nil:
482-
// if err is nil, we expect backupManifest to be non-nil
483-
restorePos = backupManifest.Position
484-
log.Infof("Successfully restored from backup at replication position %v", restorePos)
485-
case mysqlctl.ErrNoBackup:
486-
// There is no backup found, but we may be taking the initial backup of a shard
487-
if !allowFirstBackup {
488-
return errors.New("no backup found; not starting up empty since --initial_backup flag was not enabled")
489-
}
490-
restorePos = replication.Position{}
491-
default:
492-
return fmt.Errorf("can't restore from backup: %v", err)
493-
}
494-
deprecatedDurationByPhase.Set("RestoreLastBackup", int64(time.Since(restoreAt).Seconds()))
495-
phase.Set(phaseNameRestoreLastBackup, int64(0))
463+
if restoreWithClone {
464+
restorePos, err = mysqlctl.CloneFromDonor(ctx, topoServer, mysqld, initKeyspace, initShard)
465+
if err != nil {
466+
return fmt.Errorf("restore with clone failed: %v", err)
467+
}
468+
} else {
469+
phase.Set(phaseNameRestoreLastBackup, int64(1))
470+
defer phase.Set(phaseNameRestoreLastBackup, int64(0))
471+
backupDir := mysqlctl.GetBackupDir(initKeyspace, initShard)
472+
log.Infof("Restoring latest backup from directory %v", backupDir)
473+
restoreAt := time.Now()
474+
params := mysqlctl.RestoreParams{
475+
Cnf: mycnf,
476+
Mysqld: mysqld,
477+
Logger: logutil.NewConsoleLogger(),
478+
Concurrency: concurrency,
479+
HookExtraEnv: extraEnv,
480+
DeleteBeforeRestore: true,
481+
DbName: dbName,
482+
Keyspace: initKeyspace,
483+
Shard: initShard,
484+
Stats: backupstats.RestoreStats(),
485+
MysqlShutdownTimeout: mysqlShutdownTimeout,
486+
}
487+
backupManifest, err := mysqlctl.Restore(ctx, params)
488+
switch err {
489+
case nil:
490+
// if err is nil, we expect backupManifest to be non-nil
491+
restorePos = backupManifest.Position
492+
log.Infof("Successfully restored from backup at replication position %v", restorePos)
493+
case mysqlctl.ErrNoBackup:
494+
// There is no backup found, but we may be taking the initial backup of a shard
495+
if !allowFirstBackup {
496+
return errors.New("no backup found; not starting up empty since --initial_backup flag was not enabled")
497+
}
498+
restorePos = replication.Position{}
499+
default:
500+
return fmt.Errorf("can't restore from backup: %v", err)
501+
}
502+
deprecatedDurationByPhase.Set("RestoreLastBackup", int64(time.Since(restoreAt).Seconds()))
503+
phase.Set(phaseNameRestoreLastBackup, int64(0))
504+
}
496505

497506
// As of MySQL 8.0.21, you can disable redo logging using the ALTER INSTANCE
498507
// DISABLE INNODB REDO_LOG statement. This functionality is intended for

go/flags/endtoend/vtbackup.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ When run periodically for each shard, vtbackup can ensure these configurable pol
66
* Old backups for the shard are removed.
77

88
Whatever system launches vtbackup is responsible for the following:
9-
- Running vtbackup with similar flags that would be used for a vttablet and
9+
- Running vtbackup with similar flags that would be used for a vttablet and
1010
mysqlctld in the target shard to be backed up.
1111

1212
- Provisioning as much disk space for vtbackup as would be given to vttablet.
@@ -68,6 +68,8 @@ Flags:
6868
--builtinbackup-mysqld-timeout duration how long to wait for mysqld to shutdown at the start of the backup. (default 10m0s)
6969
--builtinbackup-progress duration how often to send progress updates when backing up large files. (default 5s)
7070
--ceph-backup-storage-config string Path to JSON config file for ceph backup storage. (default "ceph_backup_config.json")
71+
--clone-from-primary Clone data from the primary tablet in the shard using MySQL CLONE REMOTE instead of restoring from backup. Requires MySQL 8.0.17+. Mutually exclusive with --clone-from-tablet.
72+
--clone-from-tablet string Clone data from this tablet using MySQL CLONE REMOTE instead of restoring from backup (tablet alias, e.g., zone1-123). Requires MySQL 8.0.17+. Mutually exclusive with --clone-from-primary.
7173
--compression-engine-name string compressor engine used for compression. (default "pargzip")
7274
--compression-level int what level to pass to the compressor. (default 1)
7375
--concurrency int (init restore parameter) how many concurrent files to restore at once (default 4)
@@ -189,6 +191,7 @@ Flags:
189191
--mycnf-slow-log-path string mysql slow query log path
190192
--mycnf-socket-file string mysql socket file
191193
--mycnf-tmp-dir string mysql tmp directory
194+
--mysql-clone-enabled Enable MySQL CLONE plugin and user for backup/replica provisioning (requires MySQL 8.0.17+)
192195
--mysql-port int MySQL port (default 3306)
193196
--mysql-server-version string MySQL server version to advertise. (default "8.4.6-Vitess")
194197
--mysql-shell-backup-location string location where the backup will be stored
@@ -207,6 +210,7 @@ Flags:
207210
--purge-logs-interval duration how often try to remove old logs (default 1h0m0s)
208211
--remote-operation-timeout duration time to wait for a remote operation (default 15s)
209212
--restart-before-backup Perform a mysqld clean/full restart after applying binlogs, but before taking the backup. Only makes sense to work around xtrabackup bugs.
213+
--restore-with-clone (init parameter) will perform the restore phase with MySQL CLONE, requires either --clone-from-primary or --clone-from-tablet
210214
--s3-backup-aws-endpoint string endpoint of the S3 backend (region must be provided).
211215
--s3-backup-aws-min-partsize int Minimum part size to use, defaults to 5MiB but can be increased due to the dataset size. (default 5242880)
212216
--s3-backup-aws-region string AWS region to use. (default "us-east-1")
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
/*
2+
Copyright 2025 The Vitess Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package clone
18+
19+
import (
20+
"os"
21+
"testing"
22+
"time"
23+
24+
"github.com/stretchr/testify/assert"
25+
"github.com/stretchr/testify/require"
26+
27+
"vitess.io/vitess/go/test/endtoend/cluster"
28+
"vitess.io/vitess/go/vt/log"
29+
vtutils "vitess.io/vitess/go/vt/utils"
30+
)
31+
32+
func TestCloneBackup(t *testing.T) {
33+
t.Cleanup(func() { removeBackups(t) })
34+
t.Cleanup(tearDown)
35+
36+
// Initialize tablets first so we can connect to MySQL.
37+
for _, tablet := range []*cluster.Vttablet{primary, replica1} {
38+
err := localCluster.InitTablet(tablet, keyspaceName, shardName)
39+
require.NoError(t, err)
40+
err = tablet.VttabletProcess.Setup()
41+
require.NoError(t, err)
42+
}
43+
44+
// Initialize shard primary.
45+
err := localCluster.VtctldClientProcess.InitShardPrimary(keyspaceName, shardName, cell, primary.TabletUID)
46+
require.NoError(t, err)
47+
48+
// Now check if MySQL version supports clone (need vttablet running to query).
49+
if !mysqlVersionSupportsClone(t, primary) {
50+
t.Skip("Skipping clone test: MySQL version does not support CLONE (requires 8.0.17+)")
51+
}
52+
53+
// Check if clone plugin is available.
54+
if !clonePluginAvailable(t, primary) {
55+
t.Skip("Skipping clone test: clone plugin not available")
56+
}
57+
58+
// Set up clean test data (table may have data from previous tests).
59+
_, err = primary.VttabletProcess.QueryTablet(vtInsertTest, keyspaceName, true)
60+
require.NoError(t, err)
61+
_, err = primary.VttabletProcess.QueryTablet("TRUNCATE TABLE vt_insert_test", keyspaceName, true)
62+
require.NoError(t, err)
63+
_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('clone_test_1')", keyspaceName, true)
64+
require.NoError(t, err)
65+
_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('clone_test_2')", keyspaceName, true)
66+
require.NoError(t, err)
67+
68+
// Verify data exists on primary.
69+
cluster.VerifyRowsInTablet(t, primary, keyspaceName, 2)
70+
71+
// Wait for replica to catch up.
72+
time.Sleep(2 * time.Second)
73+
cluster.VerifyRowsInTablet(t, replica1, keyspaceName, 2)
74+
75+
// Take a backup using clone from primary.
76+
log.Infof("Starting vtbackup with --clone-from-primary")
77+
err = vtbackupWithClone(t)
78+
require.NoError(t, err)
79+
80+
// Verify a backup was created.
81+
backups := verifyBackupCount(t, shardKsName, 1)
82+
assert.NotEmpty(t, backups)
83+
84+
// Insert more data AFTER the backup was taken.
85+
_, err = primary.VttabletProcess.QueryTablet("insert into vt_insert_test (msg) values ('after_backup')", keyspaceName, true)
86+
require.NoError(t, err)
87+
cluster.VerifyRowsInTablet(t, primary, keyspaceName, 3)
88+
89+
// Now bring up replica2 and restore from the backup we just created.
90+
// This verifies the clone-based backup actually contains the data.
91+
log.Infof("Restoring replica2 from backup to verify clone worked")
92+
err = localCluster.InitTablet(replica2, keyspaceName, shardName)
93+
require.NoError(t, err)
94+
restore(t, replica2, "replica", "SERVING")
95+
96+
// Give replica2 time to catch up via replication.
97+
time.Sleep(5 * time.Second)
98+
99+
// Verify replica2 has ALL the data (2 rows from before backup + 1 from after).
100+
// The 2 pre-backup rows prove the clone-based backup worked.
101+
// The 3rd row proves replication is working after restore.
102+
cluster.VerifyRowsInTablet(t, replica2, keyspaceName, 3)
103+
log.Infof("Clone backup verification successful: replica2 has all data")
104+
}
105+
106+
func vtbackupWithClone(t *testing.T) error {
107+
mysqlSocket, err := os.CreateTemp("", "vtbackup_clone_test_mysql.sock")
108+
require.NoError(t, err)
109+
defer os.Remove(mysqlSocket.Name())
110+
111+
extraArgs := []string{
112+
"--allow_first_backup",
113+
"--db-credentials-file", dbCredentialFile,
114+
"--mysql-clone-enabled",
115+
vtutils.GetFlagVariantForTests("--mysql-socket"), mysqlSocket.Name(),
116+
// Clone from primary instead of restoring from backup.
117+
"--restore-with-clone",
118+
"--clone-from-primary",
119+
// Clone credentials - use vt_clone user which is created with @'%' host
120+
// and BACKUP_ADMIN privilege in init_db.sql (no password).
121+
"--db-clone-user", "vt_clone",
122+
"--db-clone-password", "",
123+
"--db-clone-use-ssl=false",
124+
}
125+
126+
log.Infof("Starting vtbackup with clone args: %v", extraArgs)
127+
return localCluster.StartVtbackup(newInitDBFile, false, keyspaceName, shardName, cell, extraArgs...)
128+
}
129+
130+
func verifyBackupCount(t *testing.T, shardKsName string, expected int) []string {
131+
backups, err := localCluster.VtctldClientProcess.ExecuteCommandWithOutput("GetBackups", shardKsName)
132+
require.NoError(t, err)
133+
134+
var result []string
135+
for _, line := range splitLines(backups) {
136+
if line != "" {
137+
result = append(result, line)
138+
}
139+
}
140+
assert.Equalf(t, expected, len(result), "expected %d backups, got %d", expected, len(result))
141+
return result
142+
}
143+
144+
func restore(t *testing.T, tablet *cluster.Vttablet, tabletType string, waitForState string) {
145+
// Start tablet with restore enabled. MySQL is already running from TestMain.
146+
log.Infof("restoring tablet %s", time.Now())
147+
tablet.VttabletProcess.ExtraArgs = []string{"--db-credentials-file", dbCredentialFile}
148+
tablet.VttabletProcess.TabletType = tabletType
149+
tablet.VttabletProcess.ServingStatus = waitForState
150+
tablet.VttabletProcess.SupportsBackup = true
151+
err := tablet.VttabletProcess.Setup()
152+
require.NoError(t, err)
153+
}
154+
155+
func tearDown() {
156+
for _, tablet := range []*cluster.Vttablet{primary, replica1, replica2} {
157+
if tablet != nil && tablet.VttabletProcess != nil {
158+
_ = tablet.VttabletProcess.TearDown()
159+
}
160+
if tablet != nil {
161+
_ = localCluster.VtctldClientProcess.ExecuteCommand("DeleteTablets", "--allow-primary", tablet.Alias)
162+
}
163+
}
164+
}

go/vt/mysqlctl/clone.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,7 @@ var (
4343
)
4444

4545
func init() {
46-
// TODO: enable these flags for vtbackup.
47-
for _, cmd := range []string{"vttablet" /*, "vtbackup"*/} {
46+
for _, cmd := range []string{"vttablet", "vtbackup"} {
4847
servenv.OnParseFor(cmd, registerCloneFlags)
4948
}
5049
}

go/vt/mysqlctl/mysqld.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,19 @@ func init() {
138138
for _, cmd := range []string{"mysqlctl", "mysqlctld", "vtcombo", "vttablet", "vttestserver"} {
139139
servenv.OnParseFor(cmd, registerPoolFlags)
140140
}
141+
for _, cmd := range []string{"mysqlctl", "mysqlctld", "vtcombo", "vttablet", "vttestserver", "vtbackup"} {
142+
servenv.OnParseFor(cmd, registerMySQLDCloneFlags)
143+
}
141144
}
142145

143146
func registerMySQLDFlags(fs *pflag.FlagSet) {
144147
utils.SetFlagDurationVar(fs, &PoolDynamicHostnameResolution, "pool-hostname-resolve-interval", PoolDynamicHostnameResolution, "if set force an update to all hostnames and reconnect if changed, defaults to 0 (disabled)")
145148
utils.SetFlagStringVar(fs, &mycnfTemplateFile, "mysqlctl-mycnf-template", mycnfTemplateFile, "template file to use for generating the my.cnf file during server init")
146149
utils.SetFlagStringVar(fs, &socketFile, "mysqlctl-socket", socketFile, "socket file to use for remote mysqlctl actions (empty for local actions)")
147150
utils.SetFlagDurationVar(fs, &replicationConnectRetry, "replication-connect-retry", replicationConnectRetry, "how long to wait in between replica reconnect attempts. Only precise to the second.")
151+
}
152+
153+
func registerMySQLDCloneFlags(fs *pflag.FlagSet) {
148154
utils.SetFlagBoolVar(fs, &mysqlCloneEnabled, "mysql-clone-enabled", mysqlCloneEnabled, "Enable MySQL CLONE plugin and user for backup/replica provisioning (requires MySQL 8.0.17+)")
149155
}
150156

0 commit comments

Comments
 (0)