Skip to content

Commit 78e265e

Browse files
committed
roachtest: add benchmark roachtests for online restore
Epic: CRDB-37550 Release note: None
1 parent 6a5fe35 commit 78e265e

File tree

4 files changed

+153
-62
lines changed

4 files changed

+153
-62
lines changed

pkg/cmd/roachtest/spec/cluster_spec.go

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,9 @@ type ClusterSpec struct {
138138
MachineType string
139139
// VolumeThroughput is the min provisioned EBS volume throughput.
140140
VolumeThroughput int
141-
Zones string
141+
// VolumeIOPS is the provisioned EBS volume IOPS.
142+
VolumeIOPS int
143+
Zones string
142144
} `cloud:"aws"`
143145

144146
// Azure-specific arguments. These values apply only on clusters instantiated on Azure.
@@ -220,17 +222,17 @@ func awsMachineSupportsSSD(machineType string) bool {
220222
}
221223

222224
func getAWSOpts(
223-
machineType string, volumeSize, ebsThroughput int, localSSD bool, useSpotVMs bool,
225+
machineType string, volumeSize, ebsThroughput int, ebsIOPS int, localSSD bool, useSpotVMs bool,
224226
) vm.ProviderOpts {
225227
opts := aws.DefaultProviderOpts()
226228
if volumeSize != 0 {
227229
opts.DefaultEBSVolume.Disk.VolumeSize = volumeSize
228230
}
231+
if ebsIOPS != 0 {
232+
opts.DefaultEBSVolume.Disk.IOPs = ebsIOPS
233+
}
229234
if ebsThroughput != 0 {
230235
opts.DefaultEBSVolume.Disk.Throughput = ebsThroughput
231-
if opts.DefaultEBSVolume.Disk.IOPs < opts.DefaultEBSVolume.Disk.Throughput*4 {
232-
opts.DefaultEBSVolume.Disk.IOPs = opts.DefaultEBSVolume.Disk.Throughput * 6
233-
}
234236
}
235237
if localSSD {
236238
opts.SSDMachineType = machineType
@@ -518,10 +520,10 @@ func (s *ClusterSpec) RoachprodOpts(
518520
var workloadProviderOpts vm.ProviderOpts
519521
switch cloud {
520522
case AWS:
521-
providerOpts = getAWSOpts(machineType, s.VolumeSize, s.AWS.VolumeThroughput,
523+
providerOpts = getAWSOpts(machineType, s.VolumeSize, s.AWS.VolumeThroughput, s.AWS.VolumeIOPS,
522524
createVMOpts.SSDOpts.UseLocalSSD, s.UseSpotVMs)
523525
workloadProviderOpts = getAWSOpts(workloadMachineType, s.VolumeSize, s.AWS.VolumeThroughput,
524-
createVMOpts.SSDOpts.UseLocalSSD, s.UseSpotVMs)
526+
s.AWS.VolumeIOPS, createVMOpts.SSDOpts.UseLocalSSD, s.UseSpotVMs)
525527
case GCE:
526528
providerOpts = getGCEOpts(machineType, s.VolumeSize, ssdCount,
527529
createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration,

pkg/cmd/roachtest/spec/option.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,14 @@ func AWSVolumeThroughput(throughput int) Option {
282282
}
283283
}
284284

285+
// AWSVolumeIOPS sets the provisioned IOPS for EBS volumes when the cluster is
286+
// on AWS.
287+
func AWSVolumeIOPS(iops int) Option {
288+
return func(spec *ClusterSpec) {
289+
spec.AWS.VolumeIOPS = iops
290+
}
291+
}
292+
285293
// AWSZones is a node option which requests Geo-distributed nodes; only applies
286294
// when the test runs on AWS.
287295
//

pkg/cmd/roachtest/tests/online_restore.go

Lines changed: 103 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ type onlineRestoreSpecs struct {
5858
linkPhaseTimeout time.Duration
5959
// downloadPhaseTimeout is the timeout for the download phase of the restore, if set.
6060
downloadPhaseTimeout time.Duration
61+
// compactionConcurrency overrides the default
62+
// storage.max_download_compaction_concurrency cluster setting.
63+
compactionConcurrency int
6164
}
6265

6366
// restoreWorkload describes the workload that will run during the download
@@ -148,24 +151,67 @@ func registerOnlineRestorePerf(r registry.Registry) {
148151
linkPhaseTimeout: 45 * time.Second, // typically takes 20 seconds
149152
downloadPhaseTimeout: 20 * time.Minute, // typically takes 10 minutes.
150153
},
154+
// OR Benchmarking tests
155+
// See benchmark plan here: https://docs.google.com/spreadsheets/d/1uPcQ1YPohXKxwFxWWDUMJrYLKQOuqSZKVrI8SJam5n8
151156
{
152-
// 2TB tpcc Online Restore
153157
restoreSpecs: restoreSpecs{
154-
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 1500, workloadNode: true}),
158+
hardware: makeHardwareSpecs(hardwareSpecs{
159+
nodes: 10, volumeSize: 1500, workloadNode: true,
160+
}),
155161
backup: backupSpecs{
156162
cloud: spec.GCE,
157163
fixture: MediumFixture,
158164
},
159-
fullBackupOnly: true,
160165
timeout: 3 * time.Hour,
161166
suites: registry.Suites(registry.Nightly),
167+
fullBackupOnly: true,
162168
},
163169
workload: tpccRestore{
164170
opts: tpccRunOpts{waitFraction: 0, workers: 100, maxRate: 1000},
165171
},
166172
linkPhaseTimeout: 10 * time.Minute, // typically takes 5 minutes
167173
downloadPhaseTimeout: 4 * time.Hour, // typically takes 2 hours.
168174
},
175+
{
176+
restoreSpecs: restoreSpecs{
177+
hardware: makeHardwareSpecs(hardwareSpecs{
178+
nodes: 10, volumeSize: 1500, workloadNode: true,
179+
}),
180+
backup: backupSpecs{
181+
cloud: spec.GCE,
182+
fixture: MediumFixture,
183+
},
184+
timeout: 3 * time.Hour,
185+
suites: registry.Suites(registry.Nightly),
186+
fullBackupOnly: true,
187+
},
188+
workload: tpccRestore{
189+
opts: tpccRunOpts{waitFraction: 0, workers: 100, maxRate: 1000},
190+
},
191+
linkPhaseTimeout: 10 * time.Minute,
192+
downloadPhaseTimeout: 4 * time.Hour,
193+
compactionConcurrency: 32,
194+
},
195+
{
196+
restoreSpecs: restoreSpecs{
197+
hardware: makeHardwareSpecs(hardwareSpecs{
198+
nodes: 10, volumeSize: 1500, workloadNode: true, ebsIOPS: 15_000, ebsThroughput: 800,
199+
}),
200+
backup: backupSpecs{
201+
cloud: spec.AWS,
202+
fixture: MediumFixture,
203+
},
204+
timeout: 3 * time.Hour,
205+
suites: registry.Suites(registry.Nightly),
206+
fullBackupOnly: true,
207+
},
208+
workload: tpccRestore{
209+
opts: tpccRunOpts{waitFraction: 0, workers: 100, maxRate: 1000},
210+
},
211+
linkPhaseTimeout: 10 * time.Minute,
212+
downloadPhaseTimeout: 4 * time.Hour,
213+
compactionConcurrency: 32,
214+
},
169215
} {
170216
for _, runOnline := range []bool{true, false} {
171217
for _, useWorkarounds := range []bool{true, false} {
@@ -174,6 +220,26 @@ func registerOnlineRestorePerf(r registry.Registry) {
174220
runOnline := runOnline
175221
runWorkload := runWorkload
176222
useWorkarounds := useWorkarounds
223+
clusterSettings := []string{
224+
// TODO(dt): what's the right value for this? How do we tune this
225+
// on the fly automatically during the restore instead of by-hand?
226+
// Context: We expect many operations to take longer than usual
227+
// when some or all of the data they touch is remote. For now this
228+
// is being blanket set to 1h manually, and a user's run-book
229+
// would need to do this by hand before an online restore and
230+
// reset it manually after, but ideally the queues would be aware
231+
// of remote-ness when they pick their own timeouts and pick
232+
// accordingly.
233+
"kv.queue.process.guaranteed_time_budget='1h'",
234+
// TODO(dt): AC appears periodically reduce the workload to 0 QPS
235+
// during the download phase (sudden jumps from 0 to 2k qps to 0).
236+
// Disable for now until we figure out how to smooth this out.
237+
"admission.disk_bandwidth_tokens.elastic.enabled=false",
238+
"admission.kv.enabled=false",
239+
"admission.sql_kv_response.enabled=false",
240+
"kv.consistency_queue.enabled=false",
241+
"kv.range_merge.skip_external_bytes.enabled=true",
242+
}
177243

178244
if runOnline {
179245
sp.namePrefix = "online/"
@@ -187,10 +253,24 @@ func registerOnlineRestorePerf(r registry.Registry) {
187253

188254
sp.namePrefix = sp.namePrefix + fmt.Sprintf("workload=%t", runWorkload)
189255
if !useWorkarounds {
256+
clusterSettings = []string{}
190257
sp.skip = "used for ad hoc experiments"
191258
sp.namePrefix = sp.namePrefix + fmt.Sprintf("/workarounds=%t", useWorkarounds)
192259
}
193260

261+
if sp.compactionConcurrency != 0 {
262+
sp.namePrefix = sp.namePrefix + fmt.Sprintf(
263+
"/compaction-concurrency=%d", sp.compactionConcurrency,
264+
)
265+
clusterSettings = append(
266+
clusterSettings,
267+
fmt.Sprintf(
268+
"storage.max_download_compaction_concurrency=%d", sp.compactionConcurrency,
269+
),
270+
)
271+
sp.skip = "used for ad hoc experiments"
272+
}
273+
194274
if sp.skip == "" && !backuptestutils.IsOnlineRestoreSupported() {
195275
sp.skip = "online restore is only tested on development branch"
196276
}
@@ -215,7 +295,9 @@ func registerOnlineRestorePerf(r registry.Registry) {
215295
rd := makeRestoreDriver(t, c, sp.restoreSpecs)
216296
rd.prepareCluster(ctx)
217297

218-
restoreStats := runRestore(ctx, t, c, sp, rd, runOnline, runWorkload, useWorkarounds)
298+
restoreStats := runRestore(
299+
ctx, t, c, sp, rd, runOnline, runWorkload, clusterSettings...,
300+
)
219301
if runOnline {
220302
require.NoError(t, postRestoreValidation(
221303
ctx,
@@ -304,10 +386,7 @@ func registerOnlineRestoreCorrectness(r registry.Registry) {
304386
rd := makeRestoreDriver(t, c, sp.restoreSpecs)
305387
rd.prepareCluster(ctx)
306388

307-
runRestore(
308-
ctx, t, c, regRestoreSpecs, rd,
309-
false /* runOnline */, true /* runWorkload */, false, /* useWorkarounds */
310-
)
389+
runRestore(ctx, t, c, regRestoreSpecs, rd, false /* runOnline */, true /* runWorkload */)
311390
details, err := c.RunWithDetails(
312391
ctx,
313392
t.L(),
@@ -320,10 +399,7 @@ func registerOnlineRestoreCorrectness(r registry.Registry) {
320399
c.Wipe(ctx)
321400
rd.prepareCluster(ctx)
322401

323-
runRestore(
324-
ctx, t, c, orSpecs, rd,
325-
true /* runOnline */, true /* runWorkload */, false, /* useWorkarounds */
326-
)
402+
runRestore(ctx, t, c, orSpecs, rd, true /* runOnline */, true /* runWorkload */)
327403
details, err = c.RunWithDetails(
328404
ctx,
329405
t.L(),
@@ -577,13 +653,24 @@ type restoreStats struct {
577653
workloadEndTime time.Time
578654
}
579655

656+
// runRestore runs restore based on the provided specs.
657+
//
658+
// If runOnline is set, online restore is run, otherwise a conventional restore
659+
// is run.
660+
//
661+
// If runWorkload is set, the workload is run during the download phase of the
662+
// restore.
663+
//
664+
// clusterSettings is a list of key=value pairs of cluster settings to set
665+
// before performing the restore.
580666
func runRestore(
581667
ctx context.Context,
582668
t test.Test,
583669
c cluster.Cluster,
584670
sp onlineRestoreSpecs,
585671
rd restoreDriver,
586-
runOnline, runWorkload, useWorkarounds bool,
672+
runOnline, runWorkload bool,
673+
clusterSettings ...string,
587674
) restoreStats {
588675
testStartTime := timeutil.Now()
589676

@@ -598,36 +685,9 @@ func runRestore(
598685
return err
599686
}
600687
defer db.Close()
601-
if useWorkarounds {
602-
// TODO(dt): what's the right value for this? How do we tune this
603-
// on the fly automatically during the restore instead of by-hand?
604-
// Context: We expect many operations to take longer than usual
605-
// when some or all of the data they touch is remote. For now this
606-
// is being blanket set to 1h manually, and a user's run-book
607-
// would need to do this by hand before an online restore and
608-
// reset it manually after, but ideally the queues would be aware
609-
// of remote-ness when they pick their own timeouts and pick
610-
// accordingly.
611-
if _, err := db.Exec("SET CLUSTER SETTING kv.queue.process.guaranteed_time_budget='1h'"); err != nil {
612-
return err
613-
}
614-
// TODO(dt): AC appears periodically reduce the workload to 0 QPS
615-
// during the download phase (sudden jumps from 0 to 2k qps to 0).
616-
// Disable for now until we figure out how to smooth this out.
617-
if _, err := db.Exec("SET CLUSTER SETTING admission.disk_bandwidth_tokens.elastic.enabled=false"); err != nil {
618-
return err
619-
}
620-
if _, err := db.Exec("SET CLUSTER SETTING admission.kv.enabled=false"); err != nil {
621-
return err
622-
}
623-
if _, err := db.Exec("SET CLUSTER SETTING admission.sql_kv_response.enabled=false"); err != nil {
624-
return err
625-
}
626-
if _, err := db.Exec("SET CLUSTER SETTING kv.consistency_queue.enabled=false"); err != nil {
627-
return err
628-
}
629-
if _, err := db.Exec("SET CLUSTER SETTING kv.range_merge.skip_external_bytes.enabled=true"); err != nil {
630-
return err
688+
for _, setting := range clusterSettings {
689+
if _, err := db.Exec(fmt.Sprintf("SET CLUSTER SETTING %s", setting)); err != nil {
690+
return errors.Wrapf(err, "failed to set cluster setting %s", setting)
631691
}
632692
}
633693
opts := "WITH UNSAFE_RESTORE_INCOMPATIBLE_VERSION"

pkg/cmd/roachtest/tests/restore.go

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -359,23 +359,33 @@ func registerRestore(r registry.Registry) {
359359
timeout: 24 * time.Hour,
360360
suites: registry.Suites(registry.Weekly),
361361
},
362-
// Following two tests are just used to benchmark classic restore against
363-
// OR with the exact same fixtures and hardware.
362+
// OR Benchmarking tests
363+
// See benchmark plan here: https://docs.google.com/spreadsheets/d/1uPcQ1YPohXKxwFxWWDUMJrYLKQOuqSZKVrI8SJam5n8
364364
{
365-
hardware: makeHardwareSpecs(hardwareSpecs{}),
366-
backup: backupSpecs{cloud: spec.GCE, fixture: SmallFixture},
367-
timeout: 1 * time.Hour,
368-
suites: registry.Suites(registry.Nightly),
365+
hardware: makeHardwareSpecs(hardwareSpecs{
366+
nodes: 10, volumeSize: 1500,
367+
}),
368+
backup: backupSpecs{
369+
cloud: spec.GCE,
370+
fixture: MediumFixture,
371+
},
372+
timeout: 3 * time.Hour,
369373
fullBackupOnly: true,
370-
skip: "used for adhoc benchmarking against OR",
374+
suites: registry.Suites(registry.Nightly),
375+
skip: "used for OR benchmarking purposes",
371376
},
372377
{
373-
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 1500, workloadNode: true}),
374-
backup: backupSpecs{cloud: spec.GCE, fixture: MediumFixture},
378+
hardware: makeHardwareSpecs(hardwareSpecs{
379+
nodes: 10, volumeSize: 1500, ebsIOPS: 15_000, ebsThroughput: 800,
380+
}),
381+
backup: backupSpecs{
382+
cloud: spec.AWS,
383+
fixture: MediumFixture,
384+
},
375385
timeout: 3 * time.Hour,
376-
suites: registry.Suites(registry.Nightly),
377386
fullBackupOnly: true,
378-
skip: "used for adhoc benchmarking against OR",
387+
suites: registry.Suites(registry.Nightly),
388+
skip: "used for OR benchmarking purposes",
379389
},
380390
// TODO(msbutler): add the following tests once roachperf/grafana is hooked up and old tests are
381391
// removed:
@@ -483,9 +493,14 @@ type hardwareSpecs struct {
483493
useLocalSSD bool
484494

485495
// ebsThroughput is the min provisioned throughput of the EBS volume, in MB/s.
486-
// TODO(pavelkalinnikov): support provisioning throughput not only on EBS.
496+
// Ignored if not running on AWS. Defaults to 125 MiB/s for the default gp3
497+
// volume.
487498
ebsThroughput int
488499

500+
// ebsIOPS is the configured IOPS for the EBS volume. Ignored if not running
501+
// on AWS. Defaults to 3000 IOPS for the default gp3 volume.
502+
ebsIOPS int
503+
489504
// mem is the memory per cpu.
490505
mem spec.MemPerCPU
491506

@@ -503,6 +518,9 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry) spec.ClusterSpec {
503518
if hw.ebsThroughput != 0 {
504519
clusterOpts = append(clusterOpts, spec.AWSVolumeThroughput(hw.ebsThroughput))
505520
}
521+
if hw.ebsIOPS != 0 {
522+
clusterOpts = append(clusterOpts, spec.AWSVolumeIOPS(hw.ebsIOPS))
523+
}
506524

507525
if hw.useLocalSSD {
508526
clusterOpts = append(clusterOpts, spec.PreferLocalSSD())
@@ -600,6 +618,9 @@ func makeHardwareSpecs(override hardwareSpecs) hardwareSpecs {
600618
if override.ebsThroughput != 0 {
601619
specs.ebsThroughput = override.ebsThroughput
602620
}
621+
if override.ebsIOPS != 0 {
622+
specs.ebsIOPS = override.ebsIOPS
623+
}
603624
if specs.useLocalSSD {
604625
specs.volumeSize = 0
605626
specs.ebsThroughput = 0

0 commit comments

Comments
 (0)