Skip to content

Commit 98bd57a

Browse files
authored
[cloud] Add retry when trying to connect to cloud vm for first time (#581)
## Summary `copyConfigFileToVM` is the first command we run on the VM. Sometimes is takes fly.io a few seconds to propagate DNS, especially if the VM is located in a different region than the proxy (this can happen if the gateway is in a different region to proxy) We retry a few times to avoid failing the command. ## How was it tested? `DEVBOX_DEBUG=1 devbox cloud shell` many times
1 parent 9761a9a commit 98bd57a

File tree

2 files changed

+24
-1
lines changed

2 files changed

+24
-1
lines changed

internal/cloud/cloud.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,11 @@ func copyConfigFileToVM(hostname, username, projectDir, pathInVM string) error {
338338

339339
// Ensure the devbox-project's directory exists in the VM
340340
mkdirCmd := openssh.Command(username, hostname)
341-
_, err := mkdirCmd.ExecRemote(fmt.Sprintf(`mkdir -p "%s"`, pathInVM))
341+
// This is the first command we run on the VM. Sometimes is takes fly.io a few seconds
342+
// to propagate DNS, especially if the VM is located in a different region than
343+
// the proxy (this can happen if the gateway is in a different region to proxy)
344+
// We retry a few times to avoid failing the command.
345+
_, err := mkdirCmd.ExecRemoteWithRetry(fmt.Sprintf(`mkdir -p "%s"`, pathInVM), 5, 4)
342346
if err != nil {
343347
debug.Log("error copying config file to VM: %v", err)
344348
return errors.WithStack(err)

internal/cloud/openssh/cmd.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,13 @@ import (
88
"fmt"
99
"io"
1010
"io/fs"
11+
"math"
1112
"net"
1213
"os"
1314
"os/exec"
1415
"path/filepath"
1516
"strconv"
17+
"time"
1618

1719
"go.jetpack.io/devbox/internal/debug"
1820
)
@@ -66,6 +68,23 @@ func (c *Cmd) ExecRemote(cmd string) ([]byte, error) {
6668
return stdout.Bytes(), nil
6769
}
6870

71+
// ExecRemoteWithRetry runs the given command on the remote host, retrying
72+
// with an exponential backoff if the command fails. maxWait is the maximum
73+
// seconds we wait in between retries.
74+
func (c *Cmd) ExecRemoteWithRetry(cmd string, retries, maxWait int) ([]byte, error) {
75+
var err error
76+
var stdout []byte
77+
for i := 0; i < (retries + 1); i++ {
78+
if stdout, err = c.ExecRemote(cmd); err == nil {
79+
break
80+
}
81+
wait := int(math.Min(float64(maxWait), math.Pow(2, float64(i))))
82+
debug.Log("Error: %v Retrying ExecRemote in %d seconds", err, wait)
83+
time.Sleep(time.Duration(wait) * time.Second)
84+
}
85+
return stdout, err
86+
}
87+
6988
func (c *Cmd) cmd(sshArgs ...string) *exec.Cmd {
7089
host, port := splitHostPort(c.DestinationAddr)
7190
cmd := exec.Command("ssh", "-l", c.Username)

0 commit comments

Comments
 (0)