Skip to content

Commit 1f12770

Browse files
authored
Add standby instance support for Vetu (#719)
1 parent 75e9c08 commit 1f12770

File tree

13 files changed

+289
-61
lines changed

13 files changed

+289
-61
lines changed

.cirrus.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,36 @@ task:
5757
test_script:
5858
- go test -v -p 1 ./...
5959

60+
task:
61+
name: Test (Linux with Vetu)
62+
alias: Tests
63+
aws_credentials:
64+
role_arn: arn:aws:iam::944424729675:role/VetuIntegrationTests
65+
role_session_name: cirrus
66+
region: us-east-2
67+
ec2_instance:
68+
image: ubuntu/images/hvm-ssd/ubuntu-jammy-22.04-arm64-server-*
69+
architecture: arm64
70+
region: us-east-2
71+
type: a1.metal
72+
block_device_mappings:
73+
- device_name: /dev/sda1
74+
ebs:
75+
volume_size: 100
76+
install_vetu_script:
77+
- sudo apt-get update && sudo apt-get -y install apt-transport-https ca-certificates
78+
- echo "deb [trusted=yes] https://apt.fury.io/cirruslabs/ /" | sudo tee /etc/apt/sources.list.d/cirruslabs.list
79+
- sudo apt-get update && sudo apt-get -y install vetu
80+
test_script:
81+
- wget --no-verbose -O - https://go.dev/dl/go1.22.2.linux-arm64.tar.gz | tar -C /usr/local -xz
82+
- export PATH=$PATH:/usr/local/go/bin
83+
- go test -v -p 1 ./...
84+
env:
85+
CIRRUS_INTERNAL_VETU_VM: ghcr.io/cirruslabs/ubuntu-runner-arm64:latest
86+
CIRRUS_INTERNAL_VETU_SSH_PASSWORD: admin
87+
CIRRUS_INTERNAL_VETU_SSH_USER: admin
88+
HOME: /root
89+
6090
task:
6191
name: Release (Dry Run)
6292
only_if: $CIRRUS_TAG == ''

PERSISTENT-WORKERS.md

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,4 +306,22 @@ persistent_worker:
306306
memory: 12
307307
```
308308

309-
Currently only Tart isolation is supported.
309+
Vetu configuration on `arm64` is similar:
310+
311+
```yaml
312+
standby:
313+
isolation:
314+
vetu:
315+
image: ghcr.io/cirruslabs/ubuntu-runner-arm64:latest
316+
user: admin
317+
password: admin
318+
cpu: 16
319+
memory: 48
320+
networking:
321+
host: {}
322+
disk_size: 100
323+
```
324+
325+
On `amd64`, simply replace the `image` with `ghcr.io/cirruslabs/ubuntu-runner-amd64:latest`.
326+
327+
Currently only Tart and Vetu isolations are supported for standby.

internal/commands/run.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,13 +67,11 @@ var vetuLazyPull bool
6767
// Flags useful for debugging.
6868
var debugNoCleanup bool
6969

70-
var baseEnvironment map[string]string = eenvironment.Merge(
71-
eenvironment.Static(),
72-
eenvironment.BuildID(),
73-
eenvironment.ProjectSpecific(projectDir),
74-
)
75-
76-
func readYaml(ctx context.Context, userSpecifiedEnvironment map[string]string) (*parser.Result, error) {
70+
func readYaml(
71+
ctx context.Context,
72+
baseEnvironment map[string]string,
73+
userSpecifiedEnvironment map[string]string,
74+
) (*parser.Result, error) {
7775
// Retrieve the combined YAML configuration
7876
combinedYAML, err := helpers.ReadCombinedConfig(
7977
ctx,
@@ -119,6 +117,8 @@ func readYaml(ctx context.Context, userSpecifiedEnvironment map[string]string) (
119117
}
120118

121119
func run(cmd *cobra.Command, args []string) error {
120+
baseEnvironment := makeBaseEnvironment()
121+
122122
userSpecifiedEnvironment, err := makeUserSpecifiedEnvironment()
123123
if err != nil {
124124
return fmt.Errorf("%v: %v", ErrRun, err)
@@ -127,7 +127,7 @@ func run(cmd *cobra.Command, args []string) error {
127127
// https://github.com/spf13/cobra/issues/340#issuecomment-374617413
128128
cmd.SilenceUsage = true
129129

130-
result, err := readYaml(cmd.Context(), userSpecifiedEnvironment)
130+
result, err := readYaml(cmd.Context(), baseEnvironment, userSpecifiedEnvironment)
131131
if err != nil {
132132
return err
133133
}
@@ -212,12 +212,14 @@ func newRunCmd() *cobra.Command {
212212
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
213213
completions := []string{}
214214

215+
baseEnvironment := makeBaseEnvironment()
216+
215217
userSpecifiedEnvironment, err := makeUserSpecifiedEnvironment()
216218
if err != nil {
217219
return completions, cobra.ShellCompDirectiveError
218220
}
219221

220-
result, err := readYaml(cmd.Context(), userSpecifiedEnvironment)
222+
result, err := readYaml(cmd.Context(), baseEnvironment, userSpecifiedEnvironment)
221223
if err != nil {
222224
return completions, cobra.ShellCompDirectiveError
223225
}
@@ -297,6 +299,14 @@ func newRunCmd() *cobra.Command {
297299
return cmd
298300
}
299301

302+
func makeBaseEnvironment() map[string]string {
303+
return eenvironment.Merge(
304+
eenvironment.Static(),
305+
eenvironment.BuildID(),
306+
eenvironment.ProjectSpecific(projectDir),
307+
)
308+
}
309+
300310
func makeUserSpecifiedEnvironment() (map[string]string, error) {
301311
var result map[string]string
302312
var err error

internal/commands/run_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,10 @@ func TestRunYAMLAndStarlarkHooks(t *testing.T) {
286286

287287
// TestRunContainerPull ensures that container images are pulled by default.
288288
func TestRunContainerPull(t *testing.T) {
289+
if _, ok := os.LookupEnv("CIRRUS_CONTAINER_BACKEND"); !ok {
290+
t.Skip("no container backend configured")
291+
}
292+
289293
backend, err := containerbackend.New(containerbackend.BackendTypeAuto)
290294
if err != nil {
291295
t.Fatal(err)
@@ -432,6 +436,10 @@ func TestRunPrebuiltImageTemplate(t *testing.T) {
432436
}
433437

434438
func TestAffectedFiles(t *testing.T) {
439+
if _, ok := os.LookupEnv("CIRRUS_CONTAINER_BACKEND"); !ok {
440+
t.Skip("no container backend configured")
441+
}
442+
435443
testutil.TempChdirPopulatedWith(t, "testdata/run-affected-files")
436444

437445
// Create os.Stderr writer that duplicates it's output to buf
@@ -464,6 +472,10 @@ func TestHasStaticEnvironment(t *testing.T) {
464472
}
465473

466474
func TestRunGitHubAnnotations(t *testing.T) {
475+
if _, ok := os.LookupEnv("CIRRUS_CONTAINER_BACKEND"); !ok {
476+
t.Skip("no container backend configured")
477+
}
478+
467479
testutil.TempChdirPopulatedWith(t, "testdata/run-github-annotations")
468480

469481
t.Setenv("GITHUB_ACTIONS", "true")
@@ -492,6 +504,10 @@ func TestRunGitHubAnnotations(t *testing.T) {
492504
}
493505

494506
func TestRunArtifactsDir(t *testing.T) {
507+
if _, ok := os.LookupEnv("CIRRUS_CONTAINER_BACKEND"); !ok {
508+
t.Skip("no container backend configured")
509+
}
510+
495511
testutil.TempChdirPopulatedWith(t, "testdata/run-artifacts-dir")
496512

497513
// Create os.Stderr writer that duplicates it's output to buf

internal/executor/instance/abstract/abstract.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@ import (
88
)
99

1010
type Instance interface {
11-
Run(context.Context, *runconfig.RunConfig) error
11+
Run(ctx context.Context, config *runconfig.RunConfig) error
1212
WorkingDirectory(projectDir string, dirtyMode bool) string
1313
Close(ctx context.Context) error
1414
Attributes() []attribute.KeyValue
1515
}
1616

1717
type WarmableInstance interface {
1818
// Warmup can be optionally called in case of a persistent worker is configured to be warm
19-
Warmup(context.Context, string, map[string]string, *echelon.Logger) error
19+
Warmup(ctx context.Context, ident string, env map[string]string, logger *echelon.Logger) error
2020
}

internal/executor/instance/persistentworker/isolation/vetu/vetu.go

Lines changed: 98 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,14 @@ import (
99
"github.com/cirruslabs/cirrus-cli/internal/executor/instance/runconfig"
1010
"github.com/cirruslabs/cirrus-cli/internal/executor/platform"
1111
"github.com/cirruslabs/cirrus-cli/internal/logger"
12+
"github.com/cirruslabs/echelon"
1213
"github.com/getsentry/sentry-go"
1314
"github.com/google/uuid"
1415
"go.opentelemetry.io/otel"
1516
"go.opentelemetry.io/otel/attribute"
1617
"golang.org/x/crypto/ssh"
1718
"runtime"
19+
"strconv"
1820
"strings"
1921
"time"
2022
)
@@ -39,6 +41,8 @@ type Vetu struct {
3941
diskSize uint32
4042
bridgedInterface string
4143
hostNetworking bool
44+
45+
vm *VM
4246
}
4347

4448
func New(
@@ -83,72 +87,93 @@ func (vetu *Vetu) Attributes() []attribute.KeyValue {
8387
}
8488
}
8589

86-
func (vetu *Vetu) Run(ctx context.Context, config *runconfig.RunConfig) error {
90+
func (vetu *Vetu) Warmup(
91+
ctx context.Context,
92+
ident string,
93+
env map[string]string,
94+
logger *echelon.Logger,
95+
) error {
96+
return vetu.bootVM(ctx, ident, env, false, logger)
97+
}
98+
99+
func (vetu *Vetu) bootVM(
100+
ctx context.Context,
101+
ident string,
102+
env map[string]string,
103+
lazyPull bool,
104+
logger *echelon.Logger,
105+
) error {
87106
ctx, prepareInstanceSpan := tracer.Start(ctx, "prepare-instance")
88107
defer prepareInstanceSpan.End()
89108

90-
tmpVMName := fmt.Sprintf("%s%d-", vmNamePrefix, config.TaskID) + uuid.NewString()
91-
vm, err := NewVMClonedFrom(ctx,
92-
vetu.vmName, tmpVMName,
93-
config.VetuOptions.LazyPull,
94-
config.AdditionalEnvironment,
95-
config.Logger(),
96-
)
109+
var identToBeInjected string
110+
if ident != "" {
111+
identToBeInjected = fmt.Sprintf("%s-", ident)
112+
}
113+
114+
tmpVMName := vmNamePrefix + identToBeInjected + uuid.NewString()
115+
116+
vm, err := NewVMClonedFrom(ctx, vetu.vmName, tmpVMName, lazyPull, env, logger)
97117
if err != nil {
98118
return fmt.Errorf("%w: failed to create VM cloned from %q: %v", ErrFailed, vetu.vmName, err)
99119
}
100-
defer func() {
101-
if localHub := sentry.GetHubFromContext(ctx); localHub != nil {
102-
localHub.AddBreadcrumb(&sentry.Breadcrumb{
103-
Message: fmt.Sprintf("stopping and deleting the VM %s", vm.ident),
104-
}, nil)
105-
}
106120

107-
_ = vm.Close()
108-
}()
121+
vetu.vm = vm
109122

110-
if err := vm.Configure(ctx, vetu.cpu, vetu.memory, vetu.diskSize, config.Logger()); err != nil {
123+
if err := vm.Configure(ctx, vetu.cpu, vetu.memory, vetu.diskSize, logger); err != nil {
111124
return fmt.Errorf("%w: failed to configure VM %q: %v", ErrFailed, vm.Ident(), err)
112125
}
113126

114127
// Start the VM (asynchronously)
115128
vm.Start(ctx, vetu.bridgedInterface, vetu.hostNetworking)
116129

117-
// Wait for the VM to start and get its IP address
118-
bootLogger := config.Logger().Scoped("boot virtual machine")
130+
// Wait for the VM to start and get it's DHCP address
131+
bootLogger := logger.Scoped("boot virtual machine")
119132

120-
var ip string
133+
ipCtx, ipCtxCancel := context.WithTimeoutCause(ctx, 10*time.Minute,
134+
fmt.Errorf("timed out while trying to retrieve the VM %s IP", vm.Ident()))
135+
defer ipCtxCancel()
121136

122-
for {
123-
select {
124-
case <-ctx.Done():
125-
return ctx.Err()
126-
case err := <-vm.ErrChan():
127-
return err
128-
default:
129-
time.Sleep(time.Second)
130-
}
137+
ip, err := vetu.retrieveIPLoop(ipCtx, vm)
138+
if err != nil {
139+
return err
140+
}
131141

132-
ip, err = vm.RetrieveIP(ctx)
133-
if err != nil {
134-
vetu.logger.Debugf("failed to retrieve VM %s IP: %v\n", vm.Ident(), err)
135-
continue
136-
}
142+
bootLogger.Errorf("VM was assigned with %s IP", ip)
137143

138-
break
144+
sshClient, err := remoteagent.WaitForSSH(ipCtx, fmt.Sprintf("%s:%d", ip, vetu.sshPort), vetu.sshUser,
145+
vetu.sshPassword, logger)
146+
if err != nil {
147+
return err
139148
}
149+
_ = sshClient.Close()
140150

141-
vetu.logger.Debugf("IP %s retrieved from VM %s, running agent...", ip, vm.Ident())
142-
143-
bootLogger.Errorf("VM was assigned with %s IP", ip)
144151
bootLogger.Finish(true)
145152

146-
prepareInstanceSpan.End()
153+
return nil
154+
}
155+
156+
func (vetu *Vetu) Run(ctx context.Context, config *runconfig.RunConfig) error {
157+
if vetu.vm == nil {
158+
err := vetu.bootVM(ctx, strconv.FormatInt(config.TaskID, 10), config.AdditionalEnvironment,
159+
config.VetuOptions.LazyPull, config.Logger())
160+
if err != nil {
161+
return err
162+
}
163+
}
164+
165+
ip, err := vetu.vm.RetrieveIP(ctx)
166+
if err != nil {
167+
return err
168+
}
169+
170+
// Wait for the VM to start and get its IP address
171+
vetu.logger.Debugf("IP %s retrieved from VM %s, running agent...", ip, vetu.vm.Ident())
147172

148173
err = remoteagent.WaitForAgent(ctx, vetu.logger, fmt.Sprintf("%s:%d", ip, vetu.sshPort),
149174
vetu.sshUser, vetu.sshPassword, "linux", runtime.GOARCH,
150175
config, true, vetu.initializeHooks(config), nil, "",
151-
map[string]string{"CIRRUS_VM_ID": vm.Ident()})
176+
map[string]string{"CIRRUS_VM_ID": vetu.vm.Ident()})
152177
if err != nil {
153178
return err
154179
}
@@ -164,8 +189,18 @@ func (vetu *Vetu) WorkingDirectory(projectDir string, dirtyMode bool) string {
164189
return platform.NewUnix().GenericWorkingDir()
165190
}
166191

167-
func (vetu *Vetu) Close(context.Context) error {
168-
return nil
192+
func (vetu *Vetu) Close(ctx context.Context) error {
193+
if vetu.vm == nil {
194+
return nil
195+
}
196+
197+
if localHub := sentry.GetHubFromContext(ctx); localHub != nil {
198+
localHub.AddBreadcrumb(&sentry.Breadcrumb{
199+
Message: fmt.Sprintf("stopping and deleting the VM %s", vetu.vm.ident),
200+
}, nil)
201+
}
202+
203+
return vetu.vm.Close()
169204
}
170205

171206
func Cleanup() error {
@@ -208,3 +243,24 @@ func (vetu *Vetu) initializeHooks(config *runconfig.RunConfig) []remoteagent.Wai
208243

209244
return hooks
210245
}
246+
247+
func (vetu *Vetu) retrieveIPLoop(ctx context.Context, vm *VM) (string, error) {
248+
for {
249+
select {
250+
case <-ctx.Done():
251+
return "", ctx.Err()
252+
case err := <-vm.ErrChan():
253+
return "", err
254+
default:
255+
time.Sleep(time.Second)
256+
}
257+
258+
ip, err := vm.RetrieveIP(ctx)
259+
if err != nil {
260+
vetu.logger.Debugf("failed to retrieve VM %s IP: %v\n", vm.Ident(), err)
261+
continue
262+
}
263+
264+
return ip, nil
265+
}
266+
}

0 commit comments

Comments
 (0)