Skip to content

Commit 66951e2

Browse files
committed
Make CreateVM's timeout configurable
As like StopVM, CreateVM's timeout should be configurable to accomodate other root fs images and slower environments. Fixes firecracker-microvm#423. Signed-off-by: Kazuyoshi Kato <[email protected]>
1 parent 1e7b50e commit 66951e2

File tree

8 files changed

+168
-59
lines changed

8 files changed

+168
-59
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH?=/etc/containerd/firecracker-runc-config.
187187
$(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH): $(ETC_CONTAINERD) runtime/firecracker-runc-config.json.example
188188
install -D -o root -g root -m400 runtime/firecracker-runc-config.json.example $@
189189

190+
ROOTFS_SLOW_BOOT_INSTALLPATH=$(FIRECRACKER_CONTAINERD_RUNTIME_DIR)/rootfs-slow-boot.img
191+
$(ROOTFS_SLOW_BOOT_INSTALLPATH): tools/image-builder/rootfs-slow-boot.img $(FIRECRACKER_CONTAINERD_RUNTIME_DIR)
192+
install -D -o root -g root -m400 $< $@
193+
190194
ROOTFS_SLOW_REBOOT_INSTALLPATH=$(FIRECRACKER_CONTAINERD_RUNTIME_DIR)/rootfs-slow-reboot.img
191195
$(ROOTFS_SLOW_REBOOT_INSTALLPATH): tools/image-builder/rootfs-slow-reboot.img $(FIRECRACKER_CONTAINERD_RUNTIME_DIR)
192196
install -D -o root -g root -m400 $< $@
@@ -204,7 +208,7 @@ install-default-rootfs: $(DEFAULT_ROOTFS_INSTALLPATH)
204208
install-default-runc-jailer-config: $(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH)
205209

206210
.PHONY: install-test-rootfs
207-
install-test-rootfs: $(ROOTFS_SLOW_REBOOT_INSTALLPATH)
211+
install-test-rootfs: $(ROOTFS_SLOW_BOOT_INSTALLPATH) $(ROOTFS_SLOW_REBOOT_INSTALLPATH)
208212

209213
##########################
210214
# CNI Network

firecracker-control/local.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,7 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
195195

196196
resp, err := client.CreateVM(requestCtx, req)
197197
if err != nil {
198-
err = errors.Wrap(err, "shim CreateVM returned error")
199-
s.logger.WithError(err).Error()
198+
s.logger.WithError(err).Error("shim CreateVM returned error")
200199
return nil, err
201200
}
202201

internal/vm/vsock.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,7 @@ const (
3737
// VSockDial attempts to connect to the Firecracker host-side vsock at the provided unix
3838
// path and port. It will retry connect attempts if a temporary error is encountered (up
3939
// to a fixed timeout) or the provided request is canceled.
40-
func VSockDial(reqCtx context.Context, logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
41-
ctx, cancel := context.WithTimeout(reqCtx, vsockRetryTimeout)
42-
defer cancel()
43-
40+
func VSockDial(ctx context.Context, logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
4441
tickerCh := time.NewTicker(vsockRetryInterval).C
4542
var attemptCount int
4643
for {

proto/firecracker.pb.go

Lines changed: 48 additions & 39 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proto/firecracker.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ message CreateVMRequest {
3434
bool ExitAfterAllTasksDeleted = 9;
3535

3636
JailerConfig JailerConfig = 10;
37+
38+
uint32 TimeoutSeconds = 11;
3739
}
3840

3941
message CreateVMResponse {

runtime/service.go

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,18 @@ func init() {
6868
}
6969

7070
const (
71-
defaultVsockPort = 10789
72-
minVsockIOPort = uint32(11000)
73-
firecrackerStartTimeout = 5 * time.Second
74-
defaultStopVMTimeout = 5 * time.Second
75-
defaultShutdownTimeout = 5 * time.Second
76-
jailerStopTimeout = 3 * time.Second
71+
defaultVsockPort = 10789
72+
minVsockIOPort = uint32(11000)
73+
74+
// vmReadyTimeout is used to control the time all requests wait a Go channel (vmReady) before calling
75+
// Firecracker's API server. The channel is closed once the VM starts.
76+
vmReadyTimeout = 5 * time.Second
77+
78+
defaultCreateVMTimeout = 20 * time.Second
79+
defaultStopVMTimeout = 5 * time.Second
80+
defaultShutdownTimeout = 5 * time.Second
81+
82+
jailerStopTimeout = 3 * time.Second
7783

7884
// StartEventName is the topic published to when a VM starts
7985
StartEventName = "/firecracker-vm/start"
@@ -425,7 +431,7 @@ func (s *service) waitVMReady() error {
425431
select {
426432
case <-s.vmReady:
427433
return nil
428-
case <-time.After(firecrackerStartTimeout):
434+
case <-time.After(vmReadyTimeout):
429435
return status.Error(codes.DeadlineExceeded, "timed out waiting for VM start")
430436
}
431437
}
@@ -435,14 +441,21 @@ func (s *service) waitVMReady() error {
435441
func (s *service) CreateVM(requestCtx context.Context, request *proto.CreateVMRequest) (*proto.CreateVMResponse, error) {
436442
defer logPanicAndDie(s.logger)
437443

444+
timeout := defaultCreateVMTimeout
445+
if request.TimeoutSeconds > 0 {
446+
timeout = time.Duration(request.TimeoutSeconds) * time.Second
447+
}
448+
ctxWithTimeout, cancel := context.WithTimeout(requestCtx, timeout)
449+
defer cancel()
450+
438451
var (
439452
err error
440453
createRan bool
441454
resp proto.CreateVMResponse
442455
)
443456

444457
s.vmStartOnce.Do(func() {
445-
err = s.createVM(requestCtx, request)
458+
err = s.createVM(ctxWithTimeout, request)
446459
createRan = true
447460
})
448461

@@ -453,9 +466,13 @@ func (s *service) CreateVM(requestCtx context.Context, request *proto.CreateVMRe
453466
// If we failed to create the VM, we have no point in existing anymore, so shutdown
454467
if err != nil {
455468
s.shimCancel()
456-
err = errors.Wrap(err, "failed to create VM")
457-
s.logger.WithError(err).Error()
458-
return nil, err
469+
470+
s.logger.WithError(err).Error("failed to create VM")
471+
472+
if errors.Cause(err) == context.DeadlineExceeded {
473+
return nil, status.Errorf(codes.DeadlineExceeded, "VM %q didn't start within %s: %s", request.VMID, timeout, err)
474+
}
475+
return nil, errors.Wrap(err, "failed to create VM")
459476
}
460477

461478
// creating the VM succeeded, setup monitors and publish events to celebrate

runtime/service_integ_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,3 +1787,77 @@ func findProcWithName(name string) func(context.Context, *process.Process) (bool
17871787
return filepath.Base(processExecutable) == name, nil
17881788
}
17891789
}
1790+
1791+
func TestCreateVM_Isolated(t *testing.T) {
1792+
prepareIntegTest(t)
1793+
1794+
client, err := containerd.New(containerdSockPath, containerd.WithDefaultRuntime(firecrackerRuntime))
1795+
require.NoError(t, err, "unable to create client to containerd service at %s, is containerd running?", containerdSockPath)
1796+
defer client.Close()
1797+
1798+
ctx := namespaces.WithNamespace(context.Background(), "default")
1799+
1800+
pluginClient, err := ttrpcutil.NewClient(containerdSockPath + ".ttrpc")
1801+
require.NoError(t, err, "failed to create ttrpc client")
1802+
1803+
fcClient := fccontrol.NewFirecrackerClient(pluginClient.Client())
1804+
1805+
subtests := []struct {
1806+
name string
1807+
request proto.CreateVMRequest
1808+
validate func(*testing.T, error)
1809+
}{
1810+
{
1811+
name: "Happy Case",
1812+
request: proto.CreateVMRequest{},
1813+
validate: func(t *testing.T, err error) {
1814+
require.NoError(t, err)
1815+
},
1816+
},
1817+
{
1818+
name: "Slow Root FS",
1819+
request: proto.CreateVMRequest{
1820+
RootDrive: &proto.FirecrackerRootDrive{
1821+
HostPath: "/var/lib/firecracker-containerd/runtime/rootfs-slow-boot.img",
1822+
},
1823+
},
1824+
validate: func(t *testing.T, err error) {
1825+
require.Error(t, err)
1826+
assert.Equal(t, codes.DeadlineExceeded, status.Code(err))
1827+
assert.Contains(t, err.Error(), "didn't start within 20s")
1828+
},
1829+
},
1830+
{
1831+
name: "Slow Root FS and Longer Timeout",
1832+
request: proto.CreateVMRequest{
1833+
RootDrive: &proto.FirecrackerRootDrive{
1834+
HostPath: "/var/lib/firecracker-containerd/runtime/rootfs-slow-boot.img",
1835+
},
1836+
TimeoutSeconds: 60,
1837+
},
1838+
validate: func(t *testing.T, err error) {
1839+
require.NoError(t, err)
1840+
},
1841+
},
1842+
}
1843+
1844+
for _, subtest := range subtests {
1845+
request := subtest.request
1846+
validate := subtest.validate
1847+
1848+
t.Run(subtest.name, func(t *testing.T) {
1849+
request.VMID = testNameToVMID(t.Name())
1850+
1851+
_, err = fcClient.CreateVM(ctx, &request)
1852+
validate(t, err)
1853+
1854+
if err != nil {
1855+
// No VM to stop.
1856+
return
1857+
}
1858+
1859+
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: request.VMID})
1860+
require.Equal(t, status.Code(err), codes.OK)
1861+
})
1862+
}
1863+
}

tools/image-builder/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fi
3737
touch --reference=debootstrap_stamp --no-create "$(WORKDIR)"
3838
endef
3939

40-
all: rootfs.img rootfs-slow-reboot.img
40+
all: rootfs.img rootfs-slow-boot.img rootfs-slow-reboot.img
4141

4242
$(WORKDIR):
4343
mkdir $(WORKDIR)
@@ -67,6 +67,13 @@ endif
6767
rootfs.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
6868
mksquashfs "$(WORKDIR)" rootfs.img -noappend
6969

70+
# Intentionally break the rootfs to simulate the case where CreateVM is taking longer than the minimal timeout
71+
rootfs-slow-boot.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
72+
rm -fr tmp/$@
73+
cp -a "$(WORKDIR)" tmp/$@
74+
echo 'ExecStartPre=/bin/sleep 30' >> tmp/$@/etc/systemd/system/firecracker-agent.service
75+
mksquashfs tmp/$@ $@ -noappend
76+
7077
# Intentionally break the rootfs to simulate the case where StopVM is taking longer than the minimal timeout
7178
rootfs-slow-reboot.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
7279
rm -fr tmp/$@
@@ -98,7 +105,7 @@ builder_stamp:
98105
clean:
99106
-rm -f *stamp
100107
if [ $(UID) -eq 0 ]; then \
101-
rm -f rootfs.img rootfs-slow-reboot.img ;\
108+
rm -f rootfs.img rootfs-slow-boot.img rootfs-slow-reboot.img ;\
102109
else \
103110
$(MAKE) clean-in-docker ;\
104111
fi

0 commit comments

Comments
 (0)