Skip to content

Commit 66951e2

Browse files
committed
Make CreateVM's timeout configurable
As like StopVM, CreateVM's timeout should be configurable to accomodate other root fs images and slower environments. Fixes #423. Signed-off-by: Kazuyoshi Kato <[email protected]>
1 parent 1e7b50e commit 66951e2

File tree

8 files changed

+168
-59
lines changed

8 files changed

+168
-59
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH?=/etc/containerd/firecracker-runc-config.
187187
$(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH): $(ETC_CONTAINERD) runtime/firecracker-runc-config.json.example
188188
install -D -o root -g root -m400 runtime/firecracker-runc-config.json.example $@
189189

190+
ROOTFS_SLOW_BOOT_INSTALLPATH=$(FIRECRACKER_CONTAINERD_RUNTIME_DIR)/rootfs-slow-boot.img
191+
$(ROOTFS_SLOW_BOOT_INSTALLPATH): tools/image-builder/rootfs-slow-boot.img $(FIRECRACKER_CONTAINERD_RUNTIME_DIR)
192+
install -D -o root -g root -m400 $< $@
193+
190194
ROOTFS_SLOW_REBOOT_INSTALLPATH=$(FIRECRACKER_CONTAINERD_RUNTIME_DIR)/rootfs-slow-reboot.img
191195
$(ROOTFS_SLOW_REBOOT_INSTALLPATH): tools/image-builder/rootfs-slow-reboot.img $(FIRECRACKER_CONTAINERD_RUNTIME_DIR)
192196
install -D -o root -g root -m400 $< $@
@@ -204,7 +208,7 @@ install-default-rootfs: $(DEFAULT_ROOTFS_INSTALLPATH)
204208
install-default-runc-jailer-config: $(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH)
205209

206210
.PHONY: install-test-rootfs
207-
install-test-rootfs: $(ROOTFS_SLOW_REBOOT_INSTALLPATH)
211+
install-test-rootfs: $(ROOTFS_SLOW_BOOT_INSTALLPATH) $(ROOTFS_SLOW_REBOOT_INSTALLPATH)
208212

209213
##########################
210214
# CNI Network

firecracker-control/local.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,7 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
195195

196196
resp, err := client.CreateVM(requestCtx, req)
197197
if err != nil {
198-
err = errors.Wrap(err, "shim CreateVM returned error")
199-
s.logger.WithError(err).Error()
198+
s.logger.WithError(err).Error("shim CreateVM returned error")
200199
return nil, err
201200
}
202201

internal/vm/vsock.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,7 @@ const (
3737
// VSockDial attempts to connect to the Firecracker host-side vsock at the provided unix
3838
// path and port. It will retry connect attempts if a temporary error is encountered (up
3939
// to a fixed timeout) or the provided request is canceled.
40-
func VSockDial(reqCtx context.Context, logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
41-
ctx, cancel := context.WithTimeout(reqCtx, vsockRetryTimeout)
42-
defer cancel()
43-
40+
func VSockDial(ctx context.Context, logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
4441
tickerCh := time.NewTicker(vsockRetryInterval).C
4542
var attemptCount int
4643
for {

proto/firecracker.pb.go

Lines changed: 48 additions & 39 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proto/firecracker.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ message CreateVMRequest {
3434
bool ExitAfterAllTasksDeleted = 9;
3535

3636
JailerConfig JailerConfig = 10;
37+
38+
uint32 TimeoutSeconds = 11;
3739
}
3840

3941
message CreateVMResponse {

runtime/service.go

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,18 @@ func init() {
6868
}
6969

7070
const (
71-
defaultVsockPort = 10789
72-
minVsockIOPort = uint32(11000)
73-
firecrackerStartTimeout = 5 * time.Second
74-
defaultStopVMTimeout = 5 * time.Second
75-
defaultShutdownTimeout = 5 * time.Second
76-
jailerStopTimeout = 3 * time.Second
71+
defaultVsockPort = 10789
72+
minVsockIOPort = uint32(11000)
73+
74+
// vmReadyTimeout is used to control the time all requests wait a Go channel (vmReady) before calling
75+
// Firecracker's API server. The channel is closed once the VM starts.
76+
vmReadyTimeout = 5 * time.Second
77+
78+
defaultCreateVMTimeout = 20 * time.Second
79+
defaultStopVMTimeout = 5 * time.Second
80+
defaultShutdownTimeout = 5 * time.Second
81+
82+
jailerStopTimeout = 3 * time.Second
7783

7884
// StartEventName is the topic published to when a VM starts
7985
StartEventName = "/firecracker-vm/start"
@@ -425,7 +431,7 @@ func (s *service) waitVMReady() error {
425431
select {
426432
case <-s.vmReady:
427433
return nil
428-
case <-time.After(firecrackerStartTimeout):
434+
case <-time.After(vmReadyTimeout):
429435
return status.Error(codes.DeadlineExceeded, "timed out waiting for VM start")
430436
}
431437
}
@@ -435,14 +441,21 @@ func (s *service) waitVMReady() error {
435441
func (s *service) CreateVM(requestCtx context.Context, request *proto.CreateVMRequest) (*proto.CreateVMResponse, error) {
436442
defer logPanicAndDie(s.logger)
437443

444+
timeout := defaultCreateVMTimeout
445+
if request.TimeoutSeconds > 0 {
446+
timeout = time.Duration(request.TimeoutSeconds) * time.Second
447+
}
448+
ctxWithTimeout, cancel := context.WithTimeout(requestCtx, timeout)
449+
defer cancel()
450+
438451
var (
439452
err error
440453
createRan bool
441454
resp proto.CreateVMResponse
442455
)
443456

444457
s.vmStartOnce.Do(func() {
445-
err = s.createVM(requestCtx, request)
458+
err = s.createVM(ctxWithTimeout, request)
446459
createRan = true
447460
})
448461

@@ -453,9 +466,13 @@ func (s *service) CreateVM(requestCtx context.Context, request *proto.CreateVMRe
453466
// If we failed to create the VM, we have no point in existing anymore, so shutdown
454467
if err != nil {
455468
s.shimCancel()
456-
err = errors.Wrap(err, "failed to create VM")
457-
s.logger.WithError(err).Error()
458-
return nil, err
469+
470+
s.logger.WithError(err).Error("failed to create VM")
471+
472+
if errors.Cause(err) == context.DeadlineExceeded {
473+
return nil, status.Errorf(codes.DeadlineExceeded, "VM %q didn't start within %s: %s", request.VMID, timeout, err)
474+
}
475+
return nil, errors.Wrap(err, "failed to create VM")
459476
}
460477

461478
// creating the VM succeeded, setup monitors and publish events to celebrate

runtime/service_integ_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1787,3 +1787,77 @@ func findProcWithName(name string) func(context.Context, *process.Process) (bool
17871787
return filepath.Base(processExecutable) == name, nil
17881788
}
17891789
}
1790+
1791+
func TestCreateVM_Isolated(t *testing.T) {
1792+
prepareIntegTest(t)
1793+
1794+
client, err := containerd.New(containerdSockPath, containerd.WithDefaultRuntime(firecrackerRuntime))
1795+
require.NoError(t, err, "unable to create client to containerd service at %s, is containerd running?", containerdSockPath)
1796+
defer client.Close()
1797+
1798+
ctx := namespaces.WithNamespace(context.Background(), "default")
1799+
1800+
pluginClient, err := ttrpcutil.NewClient(containerdSockPath + ".ttrpc")
1801+
require.NoError(t, err, "failed to create ttrpc client")
1802+
1803+
fcClient := fccontrol.NewFirecrackerClient(pluginClient.Client())
1804+
1805+
subtests := []struct {
1806+
name string
1807+
request proto.CreateVMRequest
1808+
validate func(*testing.T, error)
1809+
}{
1810+
{
1811+
name: "Happy Case",
1812+
request: proto.CreateVMRequest{},
1813+
validate: func(t *testing.T, err error) {
1814+
require.NoError(t, err)
1815+
},
1816+
},
1817+
{
1818+
name: "Slow Root FS",
1819+
request: proto.CreateVMRequest{
1820+
RootDrive: &proto.FirecrackerRootDrive{
1821+
HostPath: "/var/lib/firecracker-containerd/runtime/rootfs-slow-boot.img",
1822+
},
1823+
},
1824+
validate: func(t *testing.T, err error) {
1825+
require.Error(t, err)
1826+
assert.Equal(t, codes.DeadlineExceeded, status.Code(err))
1827+
assert.Contains(t, err.Error(), "didn't start within 20s")
1828+
},
1829+
},
1830+
{
1831+
name: "Slow Root FS and Longer Timeout",
1832+
request: proto.CreateVMRequest{
1833+
RootDrive: &proto.FirecrackerRootDrive{
1834+
HostPath: "/var/lib/firecracker-containerd/runtime/rootfs-slow-boot.img",
1835+
},
1836+
TimeoutSeconds: 60,
1837+
},
1838+
validate: func(t *testing.T, err error) {
1839+
require.NoError(t, err)
1840+
},
1841+
},
1842+
}
1843+
1844+
for _, subtest := range subtests {
1845+
request := subtest.request
1846+
validate := subtest.validate
1847+
1848+
t.Run(subtest.name, func(t *testing.T) {
1849+
request.VMID = testNameToVMID(t.Name())
1850+
1851+
_, err = fcClient.CreateVM(ctx, &request)
1852+
validate(t, err)
1853+
1854+
if err != nil {
1855+
// No VM to stop.
1856+
return
1857+
}
1858+
1859+
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: request.VMID})
1860+
require.Equal(t, status.Code(err), codes.OK)
1861+
})
1862+
}
1863+
}

tools/image-builder/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fi
3737
touch --reference=debootstrap_stamp --no-create "$(WORKDIR)"
3838
endef
3939

40-
all: rootfs.img rootfs-slow-reboot.img
40+
all: rootfs.img rootfs-slow-boot.img rootfs-slow-reboot.img
4141

4242
$(WORKDIR):
4343
mkdir $(WORKDIR)
@@ -67,6 +67,13 @@ endif
6767
rootfs.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
6868
mksquashfs "$(WORKDIR)" rootfs.img -noappend
6969

70+
# Intentionally break the rootfs to simulate the case where CreateVM is taking longer than the minimal timeout
71+
rootfs-slow-boot.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
72+
rm -fr tmp/$@
73+
cp -a "$(WORKDIR)" tmp/$@
74+
echo 'ExecStartPre=/bin/sleep 30' >> tmp/$@/etc/systemd/system/firecracker-agent.service
75+
mksquashfs tmp/$@ $@ -noappend
76+
7077
# Intentionally break the rootfs to simulate the case where StopVM is taking longer than the minimal timeout
7178
rootfs-slow-reboot.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
7279
rm -fr tmp/$@
@@ -98,7 +105,7 @@ builder_stamp:
98105
clean:
99106
-rm -f *stamp
100107
if [ $(UID) -eq 0 ]; then \
101-
rm -f rootfs.img rootfs-slow-reboot.img ;\
108+
rm -f rootfs.img rootfs-slow-boot.img rootfs-slow-reboot.img ;\
102109
else \
103110
$(MAKE) clean-in-docker ;\
104111
fi

0 commit comments

Comments
 (0)