Skip to content

Commit 230ab29

Browse files
committed
roachtest: support boot disk only workload VMs
Previously, workload machines automatically inherited the same disk setup as the rest of the cluster. To optimize for cost, we now allow workload machines to attach only boot disks. The default behavior is to attach a standard data disk. Hence, a new explicit flag "BootDiskOnly" has been added to the provider options to prevent the default behavior, specifically for workload machines. Epic: None Release note: None
1 parent 5abfc90 commit 230ab29

File tree

18 files changed

+164
-76
lines changed

18 files changed

+164
-76
lines changed

pkg/cmd/roachtest/spec/cluster_spec.go

Lines changed: 29 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,10 @@ type ClusterSpec struct {
104104
// treated as workload node. Defaults to a VM with 4 CPUs if not specified
105105
// by WorkloadNodeCPUs.
106106
// TODO(GouravKumar): remove use of WorkloadNode, use WorkloadNodeCount instead
107-
WorkloadNode bool
108-
WorkloadNodeCount int
109-
WorkloadNodeCPUs int
107+
WorkloadNode bool
108+
WorkloadNodeCount int
109+
WorkloadNodeCPUs int
110+
WorkloadRequiresDisk bool
110111
// CPUs is the number of CPUs per node.
111112
CPUs int
112113
Mem MemPerCPU
@@ -226,7 +227,12 @@ func awsMachineSupportsSSD(machineType string) bool {
226227
}
227228

228229
func getAWSOpts(
229-
machineType string, volumeSize, ebsThroughput int, ebsIOPS int, localSSD bool, useSpotVMs bool,
230+
machineType string,
231+
volumeSize, ebsThroughput int,
232+
ebsIOPS int,
233+
localSSD bool,
234+
useSpotVMs bool,
235+
bootDiskOnly bool,
230236
) vm.ProviderOpts {
231237
opts := aws.DefaultProviderOpts()
232238
if volumeSize != 0 {
@@ -244,6 +250,7 @@ func getAWSOpts(
244250
opts.MachineType = machineType
245251
}
246252
opts.UseSpot = useSpotVMs
253+
opts.BootDiskOnly = bootDiskOnly
247254
return opts
248255
}
249256

@@ -258,6 +265,7 @@ func getGCEOpts(
258265
volumeType string,
259266
volumeCount int,
260267
useSpot bool,
268+
bootDiskOnly bool,
261269
) vm.ProviderOpts {
262270
opts := gce.DefaultProviderOpts()
263271
opts.MachineType = machineType
@@ -286,16 +294,17 @@ func getGCEOpts(
286294
if volumeType != "" {
287295
opts.PDVolumeType = volumeType
288296
}
289-
297+
opts.BootDiskOnly = bootDiskOnly
290298
return opts
291299
}
292300

293-
func getAzureOpts(machineType string, volumeSize int) vm.ProviderOpts {
301+
func getAzureOpts(machineType string, volumeSize int, bootDiskOnly bool) vm.ProviderOpts {
294302
opts := azure.DefaultProviderOpts()
295303
opts.MachineType = machineType
296304
if volumeSize != 0 {
297305
opts.NetworkDiskSize = int32(volumeSize)
298306
}
307+
opts.BootDiskOnly = bootDiskOnly
299308
return opts
300309
}
301310

@@ -307,6 +316,7 @@ func getIBMOpts(
307316
volumeIOPS int,
308317
extraVolumeCount int,
309318
RAID0 bool,
319+
bootDiskOnly bool,
310320
) vm.ProviderOpts {
311321
opts := ibm.DefaultProviderOpts()
312322
opts.MachineType = machineType
@@ -334,6 +344,7 @@ func getIBMOpts(
334344
}
335345
opts.UseMultipleDisks = !RAID0
336346
}
347+
opts.BootDiskOnly = bootDiskOnly
337348

338349
return opts
339350
}
@@ -510,7 +521,7 @@ func (s *ClusterSpec) RoachprodOpts(
510521
var err error
511522
switch cloud {
512523
case AWS:
513-
workloadMachineType, _, err = SelectAWSMachineType(s.WorkloadNodeCPUs, s.Mem, preferLocalSSD && s.VolumeSize == 0, selectedArch)
524+
workloadMachineType, _, err = SelectAWSMachineType(s.WorkloadNodeCPUs, s.Mem, false, selectedArch)
514525
case GCE:
515526
workloadMachineType, _ = SelectGCEMachineType(s.WorkloadNodeCPUs, s.Mem, selectedArch)
516527
case Azure:
@@ -532,27 +543,29 @@ func (s *ClusterSpec) RoachprodOpts(
532543
switch cloud {
533544
case AWS:
534545
providerOpts = getAWSOpts(machineType, s.VolumeSize, s.AWS.VolumeThroughput, s.AWS.VolumeIOPS,
535-
createVMOpts.SSDOpts.UseLocalSSD, s.UseSpotVMs)
536-
workloadProviderOpts = getAWSOpts(workloadMachineType, s.VolumeSize, s.AWS.VolumeThroughput,
537-
s.AWS.VolumeIOPS, createVMOpts.SSDOpts.UseLocalSSD, s.UseSpotVMs)
546+
createVMOpts.SSDOpts.UseLocalSSD, s.UseSpotVMs, false)
547+
workloadProviderOpts = getAWSOpts(workloadMachineType, s.VolumeSize, s.AWS.VolumeThroughput, s.AWS.VolumeIOPS,
548+
createVMOpts.SSDOpts.UseLocalSSD, s.UseSpotVMs, !s.WorkloadRequiresDisk)
538549
case GCE:
539550
providerOpts = getGCEOpts(machineType, s.VolumeSize, ssdCount,
540551
createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration,
541-
s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch), s.GCE.VolumeType, s.GCE.VolumeCount, s.UseSpotVMs,
552+
s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch), s.GCE.VolumeType,
553+
s.GCE.VolumeCount, s.UseSpotVMs, false,
542554
)
543555
workloadProviderOpts = getGCEOpts(workloadMachineType, s.VolumeSize, ssdCount,
544556
createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration,
545-
s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch), s.GCE.VolumeType, s.GCE.VolumeCount, s.UseSpotVMs,
557+
s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch), s.GCE.VolumeType,
558+
s.GCE.VolumeCount, s.UseSpotVMs, !s.WorkloadRequiresDisk,
546559
)
547560
case Azure:
548-
providerOpts = getAzureOpts(machineType, s.VolumeSize)
549-
workloadProviderOpts = getAzureOpts(workloadMachineType, s.VolumeSize)
561+
providerOpts = getAzureOpts(machineType, s.VolumeSize, false)
562+
workloadProviderOpts = getAzureOpts(workloadMachineType, s.VolumeSize, true)
550563
case IBM:
551564
providerOpts = getIBMOpts(machineType, s.TerminateOnMigration, s.VolumeSize,
552-
s.IBM.VolumeType, s.IBM.VolumeIOPS, s.IBM.VolumeCount, s.RAID0,
565+
s.IBM.VolumeType, s.IBM.VolumeIOPS, s.IBM.VolumeCount, s.RAID0, false,
553566
)
554567
workloadProviderOpts = getIBMOpts(workloadMachineType, s.TerminateOnMigration, s.VolumeSize,
555-
s.IBM.VolumeType, s.IBM.VolumeIOPS, s.IBM.VolumeCount, s.RAID0,
568+
s.IBM.VolumeType, s.IBM.VolumeIOPS, s.IBM.VolumeCount, s.RAID0, true,
556569
)
557570
}
558571

pkg/cmd/roachtest/spec/option.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,15 @@ func WorkloadNodeCPU(n int) Option {
5757
}
5858
}
5959

60+
// WorkloadRequiresDisk should be used if the workload nodes should have the
61+
// exact same disk configuration as the rest of the cluster. Otherwise, all
62+
// workload nodes only have a boot disk.
63+
func WorkloadRequiresDisk() Option {
64+
return func(spec *ClusterSpec) {
65+
spec.WorkloadRequiresDisk = true
66+
}
67+
}
68+
6069
// Mem requests nodes with low/standard/high ratio of memory per CPU.
6170
func Mem(level MemPerCPU) Option {
6271
return func(spec *ClusterSpec) {

pkg/roachprod/vm/aws/aws.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,9 @@ type ProviderOpts struct {
279279
// use spot vms, spot vms are significantly cheaper, but can be preempted AWS.
280280
// see https://aws.amazon.com/ec2/spot/ for more details.
281281
UseSpot bool
282+
// BootDiskOnly ensures that no additional disks will be attached, other than
283+
// the boot disk.
284+
BootDiskOnly bool
282285
}
283286

284287
// Provider implements the vm.Provider interface for AWS.
@@ -1348,13 +1351,15 @@ func (p *Provider) runInstance(
13481351
extraMountOpts = "nobarrier"
13491352
}
13501353
}
1354+
13511355
filename, err := writeStartupScript(
13521356
name,
13531357
extraMountOpts,
13541358
opts.SSDOpts.FileSystem,
13551359
providerOpts.UseMultipleDisks,
13561360
opts.Arch == string(vm.ArchFIPS),
13571361
providerOpts.RemoteUserName,
1362+
providerOpts.BootDiskOnly,
13581363
)
13591364
if err != nil {
13601365
return nil, errors.Wrapf(err, "could not write AWS startup script to temp file")
@@ -1605,7 +1610,7 @@ func assignEBSVolumes(opts *vm.CreateOpts, providerOpts *ProviderOpts) ebsVolume
16051610
// Make a local copy of providerOpts.EBSVolumes to prevent data races
16061611
ebsVolumes := providerOpts.EBSVolumes
16071612
// The local NVMe devices are automatically mapped. Otherwise, we need to map an EBS data volume.
1608-
if !opts.SSDOpts.UseLocalSSD {
1613+
if !opts.SSDOpts.UseLocalSSD && !providerOpts.BootDiskOnly {
16091614
if len(ebsVolumes) == 0 && providerOpts.DefaultEBSVolume.Disk.VolumeType == "" {
16101615
providerOpts.DefaultEBSVolume.Disk.VolumeType = defaultEBSVolumeType
16111616
providerOpts.DefaultEBSVolume.Disk.DeleteOnTermination = true

pkg/roachprod/vm/aws/support.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ const awsStartupScriptTemplate = `#!/usr/bin/env bash
3030
# Script for setting up a AWS machine for roachprod use.
3131
3232
function setup_disks() {
33+
{{ if .BootDiskOnly }}
34+
echo "VM has no disk attached other than the boot disk."
35+
return 0
36+
{{ end }}
37+
3338
{{ if not .Zfs }}
3439
mount_opts="defaults,nofail"
3540
{{if .ExtraMountOpts}}mount_opts="${mount_opts},{{.ExtraMountOpts}}"{{end}}
@@ -181,11 +186,13 @@ func writeStartupScript(
181186
useMultiple bool,
182187
enableFips bool,
183188
remoteUser string,
189+
bootDiskOnly bool,
184190
) (string, error) {
185191
type tmplParams struct {
186192
vm.StartupArgs
187193
ExtraMountOpts string
188194
UseMultipleDisks bool
195+
BootDiskOnly bool
189196
}
190197

191198
args := tmplParams{
@@ -198,6 +205,7 @@ func writeStartupScript(
198205
),
199206
ExtraMountOpts: extraMountOpts,
200207
UseMultipleDisks: useMultiple,
208+
BootDiskOnly: bootDiskOnly,
201209
}
202210

203211
tmpfile, err := os.CreateTemp("", "aws-startup-script")

pkg/roachprod/vm/aws/support_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@ import (
1717

1818
// TestWriteStartupScriptTemplate mainly tests the startup script tpl compiles.
1919
func TestWriteStartupScriptTemplate(t *testing.T) {
20-
file, err := writeStartupScript("vm_name", "", vm.Zfs, false, false, "ubuntu")
20+
file, err := writeStartupScript("vm_name", "", vm.Zfs, false,
21+
false, "ubuntu", false)
2122
require.NoError(t, err)
2223

2324
f, err := os.ReadFile(file)

pkg/roachprod/vm/aws/testdata/startup_script

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ echo
77
function setup_disks() {
88

99

10+
11+
1012
use_multiple_disks=''
1113

1214
mount_prefix="/mnt/data"

pkg/roachprod/vm/azure/azure.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,9 @@ func (p *Provider) createVM(
931931
startupArgs.AttachedDiskLun = &lun
932932
}
933933

934+
// Check if we only require a boot disk (workload only machines).
935+
startupArgs.BootDiskOnly = providerOpts.BootDiskOnly
936+
934937
startupScript, err := evalStartupTemplate(startupArgs)
935938
if err != nil {
936939
return machine, err
@@ -1058,7 +1061,7 @@ func (p *Provider) createVM(
10581061
machine.VirtualMachineProperties.StorageProfile.DiskControllerType = compute.NVMe
10591062
}
10601063

1061-
if !opts.SSDOpts.UseLocalSSD {
1064+
if !opts.SSDOpts.UseLocalSSD && !providerOpts.BootDiskOnly {
10621065
caching := compute.CachingTypesNone
10631066

10641067
switch providerOpts.DiskCaching {

pkg/roachprod/vm/azure/flags.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ type ProviderOpts struct {
2626
NetworkDiskSize int32
2727
UltraDiskIOPS int64
2828
DiskCaching string
29+
BootDiskOnly bool
2930
}
3031

3132
// These default locations support availability zones. At the time of

pkg/roachprod/vm/azure/testdata/startup_script

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@ echo
66
# Script for setting up a Azure machine for roachprod use.
77

88
function setup_disks() {
9-
mount_opts="defaults"
9+
10+
11+
mount_opts="defaults"
1012

1113
devices=()
1214

pkg/roachprod/vm/azure/utils.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,20 @@ type azureStartupArgs struct {
2323
vm.StartupArgs
2424
AttachedDiskLun *int // Use attached disk, with specified LUN; Use local ssd if nil.
2525
DiskControllerNVMe bool // Interface data disk via NVMe
26+
BootDiskOnly bool
2627
}
2728

2829
const azureStartupTemplate = `#!/bin/bash
2930
3031
# Script for setting up a Azure machine for roachprod use.
3132
3233
function setup_disks() {
33-
mount_opts="defaults"
34+
{{ if .BootDiskOnly }}
35+
echo "VM has no disk attached other than the boot disk."
36+
return 0
37+
{{ end }}
38+
39+
mount_opts="defaults"
3440
3541
devices=()
3642
{{if .DiskControllerNVMe}}

0 commit comments

Comments
 (0)