Skip to content

Commit b876684

Browse files
authored
Merge pull request #425 from kzys/slow-boot
Make CreateVM's timeout configurable
2 parents aab00f8 + 0cd1bc3 commit b876684

File tree

8 files changed

+169
-59
lines changed

8 files changed

+169
-59
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH?=/etc/containerd/firecracker-runc-config.
187187
$(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH): $(ETC_CONTAINERD) runtime/firecracker-runc-config.json.example
188188
install -D -o root -g root -m400 runtime/firecracker-runc-config.json.example $@
189189

190+
ROOTFS_SLOW_BOOT_INSTALLPATH=$(FIRECRACKER_CONTAINERD_RUNTIME_DIR)/rootfs-slow-boot.img
191+
$(ROOTFS_SLOW_BOOT_INSTALLPATH): tools/image-builder/rootfs-slow-boot.img $(FIRECRACKER_CONTAINERD_RUNTIME_DIR)
192+
install -D -o root -g root -m400 $< $@
193+
190194
ROOTFS_SLOW_REBOOT_INSTALLPATH=$(FIRECRACKER_CONTAINERD_RUNTIME_DIR)/rootfs-slow-reboot.img
191195
$(ROOTFS_SLOW_REBOOT_INSTALLPATH): tools/image-builder/rootfs-slow-reboot.img $(FIRECRACKER_CONTAINERD_RUNTIME_DIR)
192196
install -D -o root -g root -m400 $< $@
@@ -204,7 +208,7 @@ install-default-rootfs: $(DEFAULT_ROOTFS_INSTALLPATH)
204208
install-default-runc-jailer-config: $(DEFAULT_RUNC_JAILER_CONFIG_INSTALLPATH)
205209

206210
.PHONY: install-test-rootfs
207-
install-test-rootfs: $(ROOTFS_SLOW_REBOOT_INSTALLPATH)
211+
install-test-rootfs: $(ROOTFS_SLOW_BOOT_INSTALLPATH) $(ROOTFS_SLOW_REBOOT_INSTALLPATH)
208212

209213
##########################
210214
# CNI Network

firecracker-control/local.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,7 @@ func (s *local) CreateVM(requestCtx context.Context, req *proto.CreateVMRequest)
195195

196196
resp, err := client.CreateVM(requestCtx, req)
197197
if err != nil {
198-
err = errors.Wrap(err, "shim CreateVM returned error")
199-
s.logger.WithError(err).Error()
198+
s.logger.WithError(err).Error("shim CreateVM returned error")
200199
return nil, err
201200
}
202201

internal/vm/vsock.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,7 @@ const (
3737
// VSockDial attempts to connect to the Firecracker host-side vsock at the provided unix
3838
// path and port. It will retry connect attempts if a temporary error is encountered (up
3939
// to a fixed timeout) or the provided request is canceled.
40-
func VSockDial(reqCtx context.Context, logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
41-
ctx, cancel := context.WithTimeout(reqCtx, vsockRetryTimeout)
42-
defer cancel()
43-
40+
func VSockDial(ctx context.Context, logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
4441
tickerCh := time.NewTicker(vsockRetryInterval).C
4542
var attemptCount int
4643
for {

proto/firecracker.pb.go

Lines changed: 48 additions & 39 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proto/firecracker.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ message CreateVMRequest {
3434
bool ExitAfterAllTasksDeleted = 9;
3535

3636
JailerConfig JailerConfig = 10;
37+
38+
uint32 TimeoutSeconds = 11;
3739
}
3840

3941
message CreateVMResponse {

runtime/service.go

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -68,12 +68,18 @@ func init() {
6868
}
6969

7070
const (
71-
defaultVsockPort = 10789
72-
minVsockIOPort = uint32(11000)
73-
firecrackerStartTimeout = 5 * time.Second
74-
defaultStopVMTimeout = 5 * time.Second
75-
defaultShutdownTimeout = 5 * time.Second
76-
jailerStopTimeout = 3 * time.Second
71+
defaultVsockPort = 10789
72+
minVsockIOPort = uint32(11000)
73+
74+
// vmReadyTimeout is used to control the time all requests wait a Go channel (vmReady) before calling
75+
// Firecracker's API server. The channel is closed once the VM starts.
76+
vmReadyTimeout = 5 * time.Second
77+
78+
defaultCreateVMTimeout = 20 * time.Second
79+
defaultStopVMTimeout = 5 * time.Second
80+
defaultShutdownTimeout = 5 * time.Second
81+
82+
jailerStopTimeout = 3 * time.Second
7783

7884
// StartEventName is the topic published to when a VM starts
7985
StartEventName = "/firecracker-vm/start"
@@ -425,7 +431,7 @@ func (s *service) waitVMReady() error {
425431
select {
426432
case <-s.vmReady:
427433
return nil
428-
case <-time.After(firecrackerStartTimeout):
434+
case <-time.After(vmReadyTimeout):
429435
return status.Error(codes.DeadlineExceeded, "timed out waiting for VM start")
430436
}
431437
}
@@ -435,14 +441,21 @@ func (s *service) waitVMReady() error {
435441
func (s *service) CreateVM(requestCtx context.Context, request *proto.CreateVMRequest) (*proto.CreateVMResponse, error) {
436442
defer logPanicAndDie(s.logger)
437443

444+
timeout := defaultCreateVMTimeout
445+
if request.TimeoutSeconds > 0 {
446+
timeout = time.Duration(request.TimeoutSeconds) * time.Second
447+
}
448+
ctxWithTimeout, cancel := context.WithTimeout(requestCtx, timeout)
449+
defer cancel()
450+
438451
var (
439452
err error
440453
createRan bool
441454
resp proto.CreateVMResponse
442455
)
443456

444457
s.vmStartOnce.Do(func() {
445-
err = s.createVM(requestCtx, request)
458+
err = s.createVM(ctxWithTimeout, request)
446459
createRan = true
447460
})
448461

@@ -453,9 +466,13 @@ func (s *service) CreateVM(requestCtx context.Context, request *proto.CreateVMRe
453466
// If we failed to create the VM, we have no point in existing anymore, so shutdown
454467
if err != nil {
455468
s.shimCancel()
456-
err = errors.Wrap(err, "failed to create VM")
457-
s.logger.WithError(err).Error()
458-
return nil, err
469+
470+
s.logger.WithError(err).Error("failed to create VM")
471+
472+
if errors.Cause(err) == context.DeadlineExceeded {
473+
return nil, status.Errorf(codes.DeadlineExceeded, "VM %q didn't start within %s: %s", request.VMID, timeout, err)
474+
}
475+
return nil, errors.Wrap(err, "failed to create VM")
459476
}
460477

461478
// creating the VM succeeded, setup monitors and publish events to celebrate

runtime/service_integ_test.go

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,7 @@ func TestDriveMount_Isolated(t *testing.T) {
10471047
UID: 300000,
10481048
GID: 300000,
10491049
},
1050+
TimeoutSeconds: 60,
10501051
})
10511052
require.NoError(t, err, "failed to create vm")
10521053

@@ -1787,3 +1788,77 @@ func findProcWithName(name string) func(context.Context, *process.Process) (bool
17871788
return filepath.Base(processExecutable) == name, nil
17881789
}
17891790
}
1791+
1792+
func TestCreateVM_Isolated(t *testing.T) {
1793+
prepareIntegTest(t)
1794+
1795+
client, err := containerd.New(containerdSockPath, containerd.WithDefaultRuntime(firecrackerRuntime))
1796+
require.NoError(t, err, "unable to create client to containerd service at %s, is containerd running?", containerdSockPath)
1797+
defer client.Close()
1798+
1799+
ctx := namespaces.WithNamespace(context.Background(), "default")
1800+
1801+
pluginClient, err := ttrpcutil.NewClient(containerdSockPath + ".ttrpc")
1802+
require.NoError(t, err, "failed to create ttrpc client")
1803+
1804+
fcClient := fccontrol.NewFirecrackerClient(pluginClient.Client())
1805+
1806+
subtests := []struct {
1807+
name string
1808+
request proto.CreateVMRequest
1809+
validate func(*testing.T, error)
1810+
}{
1811+
{
1812+
name: "Happy Case",
1813+
request: proto.CreateVMRequest{},
1814+
validate: func(t *testing.T, err error) {
1815+
require.NoError(t, err)
1816+
},
1817+
},
1818+
{
1819+
name: "Slow Root FS",
1820+
request: proto.CreateVMRequest{
1821+
RootDrive: &proto.FirecrackerRootDrive{
1822+
HostPath: "/var/lib/firecracker-containerd/runtime/rootfs-slow-boot.img",
1823+
},
1824+
},
1825+
validate: func(t *testing.T, err error) {
1826+
require.Error(t, err)
1827+
assert.Equal(t, codes.DeadlineExceeded, status.Code(err))
1828+
assert.Contains(t, err.Error(), "didn't start within 20s")
1829+
},
1830+
},
1831+
{
1832+
name: "Slow Root FS and Longer Timeout",
1833+
request: proto.CreateVMRequest{
1834+
RootDrive: &proto.FirecrackerRootDrive{
1835+
HostPath: "/var/lib/firecracker-containerd/runtime/rootfs-slow-boot.img",
1836+
},
1837+
TimeoutSeconds: 60,
1838+
},
1839+
validate: func(t *testing.T, err error) {
1840+
require.NoError(t, err)
1841+
},
1842+
},
1843+
}
1844+
1845+
for _, subtest := range subtests {
1846+
request := subtest.request
1847+
validate := subtest.validate
1848+
1849+
t.Run(subtest.name, func(t *testing.T) {
1850+
request.VMID = testNameToVMID(t.Name())
1851+
1852+
_, err = fcClient.CreateVM(ctx, &request)
1853+
validate(t, err)
1854+
1855+
if err != nil {
1856+
// No VM to stop.
1857+
return
1858+
}
1859+
1860+
_, err = fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: request.VMID})
1861+
require.Equal(t, status.Code(err), codes.OK)
1862+
})
1863+
}
1864+
}

tools/image-builder/Makefile

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ fi
3737
touch --reference=debootstrap_stamp --no-create "$(WORKDIR)"
3838
endef
3939

40-
all: rootfs.img rootfs-slow-reboot.img
40+
all: rootfs.img rootfs-slow-boot.img rootfs-slow-reboot.img
4141

4242
$(WORKDIR):
4343
mkdir $(WORKDIR)
@@ -67,6 +67,13 @@ endif
6767
rootfs.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
6868
mksquashfs "$(WORKDIR)" rootfs.img -noappend
6969

70+
# Intentionally break the rootfs to simulate the case where CreateVM is taking longer than the minimal timeout
71+
rootfs-slow-boot.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
72+
rm -fr tmp/$@
73+
cp -a "$(WORKDIR)" tmp/$@
74+
echo 'ExecStartPre=/bin/sleep 30' >> tmp/$@/etc/systemd/system/firecracker-agent.service
75+
mksquashfs tmp/$@ $@ -noappend
76+
7077
# Intentionally break the rootfs to simulate the case where StopVM is taking longer than the minimal timeout
7178
rootfs-slow-reboot.img: files_common_stamp files_debootstrap_stamp files_ephemeral_stamp
7279
rm -fr tmp/$@
@@ -98,7 +105,7 @@ builder_stamp:
98105
clean:
99106
-rm -f *stamp
100107
if [ $(UID) -eq 0 ]; then \
101-
rm -f rootfs.img rootfs-slow-reboot.img ;\
108+
rm -f rootfs.img rootfs-slow-boot.img rootfs-slow-reboot.img ;\
102109
else \
103110
$(MAKE) clean-in-docker ;\
104111
fi

0 commit comments

Comments
 (0)