Skip to content

Commit e6dbabb

Browse files
committed
roachtest: provision 250 MB/s for 8TB restore test
The `restore/tpce/8TB/aws/nodes=10/cpus=8` test maxes out the default 125 MB/s EBS throughput. This commit provisions throughput to be 250 MB/s so that the test doesn't work at the edge of overload. Epic: none Release note: none
1 parent ab13de5 commit e6dbabb

File tree

3 files changed

+29
-3
lines changed

3 files changed

+29
-3
lines changed

pkg/cmd/roachtest/spec/cluster_spec.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ type ClusterSpec struct {
9393
// generality. Make it easier to just inject cloud-specific arguments.
9494
GCEMinCPUPlatform string
9595
GCEVolumeType string
96+
// AWS-specific arguments.
97+
//
98+
// AWSVolumeThroughput is the min provisioned EBS volume throughput.
99+
AWSVolumeThroughput int
96100
}
97101

98102
// MakeClusterSpec makes a ClusterSpec.
@@ -140,11 +144,16 @@ func awsMachineSupportsSSD(machineType string) bool {
140144
return false
141145
}
142146

143-
func getAWSOpts(machineType string, zones []string, volumeSize int, localSSD bool) vm.ProviderOpts {
147+
func getAWSOpts(
148+
machineType string, zones []string, volumeSize, ebsThroughput int, localSSD bool,
149+
) vm.ProviderOpts {
144150
opts := aws.DefaultProviderOpts()
145151
if volumeSize != 0 {
146152
opts.DefaultEBSVolume.Disk.VolumeSize = volumeSize
147153
}
154+
if ebsThroughput != 0 {
155+
opts.DefaultEBSVolume.Disk.Throughput = ebsThroughput
156+
}
148157
if localSSD {
149158
opts.SSDMachineType = machineType
150159
} else {
@@ -310,7 +319,8 @@ func (s *ClusterSpec) RoachprodOpts(
310319
var providerOpts vm.ProviderOpts
311320
switch s.Cloud {
312321
case AWS:
313-
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, createVMOpts.SSDOpts.UseLocalSSD)
322+
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, s.AWSVolumeThroughput,
323+
createVMOpts.SSDOpts.UseLocalSSD)
314324
case GCE:
315325
providerOpts = getGCEOpts(machineType, zones, s.VolumeSize, ssdCount,
316326
createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration,

pkg/cmd/roachtest/tests/restore.go

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,10 @@ func registerRestore(r registry.Registry) {
323323
},
324324
{
325325
// The nightly 8TB Restore test.
326-
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 2000}),
326+
// NB: bump disk throughput because this load saturates the default 125
327+
// MB/s. See https://github.com/cockroachdb/cockroach/issues/107609.
328+
hardware: makeHardwareSpecs(hardwareSpecs{nodes: 10, volumeSize: 2000,
329+
ebsThroughput: 250 /* MB/s */}),
327330
backup: makeRestoringBackupSpecs(backupSpecs{
328331
version: "v22.2.1",
329332
workload: tpceRestore{customers: 500000}}),
@@ -466,6 +469,9 @@ type hardwareSpecs struct {
466469
// volumeSize indicates the size of per node block storage (pd-ssd for gcs,
467470
// ebs for aws). If zero, local ssd's are used.
468471
volumeSize int
472+
// ebsThroughput is the min provisioned throughput of the EBS volume, in MB/s.
473+
// TODO(pavelkalinnikov): support provisioning throughput not only on EBS.
474+
ebsThroughput int
469475

470476
// mem is the memory per cpu.
471477
mem spec.MemPerCPU
@@ -494,6 +500,10 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry, backupCloud string
494500
}
495501
s := r.MakeClusterSpec(hw.nodes+addWorkloadNode, clusterOpts...)
496502

503+
if hw.ebsThroughput != 0 {
504+
s.AWSVolumeThroughput = hw.ebsThroughput
505+
}
506+
497507
if backupCloud == spec.AWS && s.Cloud == spec.AWS && s.VolumeSize != 0 {
498508
// Work around an issue that RAID0s local NVMe and GP3 storage together:
499509
// https://github.com/cockroachdb/cockroach/issues/98783.
@@ -554,6 +564,9 @@ func makeHardwareSpecs(override hardwareSpecs) hardwareSpecs {
554564
if override.volumeSize != 0 {
555565
specs.volumeSize = override.volumeSize
556566
}
567+
if override.ebsThroughput != 0 {
568+
specs.ebsThroughput = override.ebsThroughput
569+
}
557570
specs.zones = override.zones
558571
specs.workloadNode = override.workloadNode
559572
return specs

pkg/roachprod/vm/gce/gcloud.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,6 +1009,9 @@ func (p *Provider) Create(
10091009
fmt.Sprintf("size=%dGB", providerOpts.PDVolumeSize),
10101010
"auto-delete=yes",
10111011
}
1012+
// TODO(pavelkalinnikov): support disk types with "provisioned-throughput"
1013+
// option, such as Hyperdisk Throughput:
1014+
// https://cloud.google.com/compute/docs/disks/add-hyperdisk#hyperdisk-throughput.
10121015
args = append(args, "--create-disk", strings.Join(pdProps, ","))
10131016
// Enable DISCARD commands for persistent disks, as is advised in:
10141017
// https://cloud.google.com/compute/docs/disks/optimizing-pd-performance#formatting_parameters.

0 commit comments

Comments
 (0)