Skip to content

Commit e70add5

Browse files
committed
fix: extend vsock ack message timeout for remote snapshot dials
Make the vsock ACK message timeout configurable. Depending on the underlying host load the default 1 second timeout may be on the border for creating a stable connection to the microVM's vsock. Allow demux snapshotter users to configure the timeout per their instance performance. Signed-off-by: Austin Vazquez <[email protected]>
1 parent 569c2f1 commit e70add5

File tree

6 files changed

+30
-20
lines changed

6 files changed

+30
-20
lines changed

.buildkite/pipeline.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,13 @@ steps:
100100
env:
101101
DOCKER_IMAGE_TAG: "$BUILDKITE_BUILD_NUMBER"
102102
NUMBER_OF_VMS: 10
103-
EXTRAGOARGS: "-v -count=1 -race"
103+
EXTRAGOARGS: "-v -count=1 -race -timeout 3m"
104104
FICD_DM_VOLUME_GROUP: fcci-vg
105105
artifact_paths:
106106
- "snapshotter/logs/*"
107107
command:
108108
- make -C snapshotter integ-test FICD_DM_POOL=build_${BUILDKITE_BUILD_NUMBER}_snapshotter
109+
timeout_in_minutes: 10
109110

110111
- wait
111112

snapshotter/app/service.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"strconv"
2323
"strings"
2424
"syscall"
25+
"time"
2526

2627
snapshotsapi "github.com/containerd/containerd/api/services/snapshots/v1"
2728
"github.com/containerd/containerd/contrib/snapshotservice"
@@ -175,8 +176,10 @@ func initSnapshotter(ctx context.Context, config config.Config, cache cache.Cach
175176
if err != nil {
176177
return nil, err
177178
}
179+
180+
ackMsgTimeout := time.Duration(config.Snapshotter.Dialer.AckMsgTimeoutInSeconds) * time.Second
178181
snapshotterDialer := func(ctx context.Context, namespace string) (net.Conn, error) {
179-
return vsock.DialContext(ctx, host, uint32(port), vsock.WithLogger(log.G(ctx)))
182+
return vsock.DialContext(ctx, host, uint32(port), vsock.WithLogger(log.G(ctx)), vsock.WithAckMsgTimeout(ackMsgTimeout))
180183
}
181184

182185
var metricsProxy *metrics.Proxy

snapshotter/config/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ type Config struct {
3232

3333
type snapshotter struct {
3434
Listener listener `toml:"listener"`
35+
Dialer dialer `toml:"dialer"`
3536
Proxy proxy `toml:"proxy"`
3637
Metrics metrics `toml:"metrics"`
3738
}
@@ -41,6 +42,10 @@ type listener struct {
4142
Address string `toml:"address" default:"/var/lib/demux-snapshotter/snapshotter.sock"`
4243
}
4344

45+
type dialer struct {
46+
AckMsgTimeoutInSeconds int `toml:"ack_msg_timeout_in_seconds" default:"1"`
47+
}
48+
4449
type proxy struct {
4550
Address address `toml:"address"`
4651
}

snapshotter/config/config_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,9 @@ func defaultConfig() error {
6262
Network: "unix",
6363
Address: "/var/lib/demux-snapshotter/snapshotter.sock",
6464
},
65+
Dialer: dialer{
66+
AckMsgTimeoutInSeconds: 1,
67+
},
6568
Metrics: metrics{
6669
Enable: false,
6770
},
@@ -79,6 +82,8 @@ func parseExampleConfig() error {
7982
[snapshotter.listener]
8083
network = "unix"
8184
address = "/var/lib/demux-snapshotter/non-default-snapshotter.vsock"
85+
[snapshotter.dialer]
86+
ack_msg_timeout_in_seconds = 4
8287
[snapshotter.proxy.address.resolver]
8388
type = "http"
8489
address = "localhost:10001"
@@ -96,6 +101,9 @@ func parseExampleConfig() error {
96101
Network: "unix",
97102
Address: "/var/lib/demux-snapshotter/non-default-snapshotter.vsock",
98103
},
104+
Dialer: dialer{
105+
AckMsgTimeoutInSeconds: 4,
106+
},
99107
Proxy: proxy{
100108
Address: address{
101109
Resolver: resolver{

snapshotter/service_integ_test.go

Lines changed: 8 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"fmt"
1919
"strconv"
2020
"testing"
21-
"time"
2221

2322
"github.com/containerd/containerd"
2423
"github.com/containerd/containerd/namespaces"
@@ -53,23 +52,14 @@ func TestLaunchContainerWithRemoteSnapshotter_Isolated(t *testing.T) {
5352
integtest.Prepare(t, integtest.WithDefaultNetwork())
5453

5554
vmID := 0
56-
57-
testTimeout := 300 * time.Second
58-
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
59-
defer cancel()
60-
61-
err := launchContainerWithRemoteSnapshotterInVM(ctx, strconv.Itoa(vmID))
55+
err := launchContainerWithRemoteSnapshotterInVM(context.Background(), strconv.Itoa(vmID))
6256
require.NoError(t, err)
6357
}
6458

6559
func TestLaunchMultipleContainersWithRemoteSnapshotter_Isolated(t *testing.T) {
6660
integtest.Prepare(t, integtest.WithDefaultNetwork())
6761

68-
testTimeout := 600 * time.Second
69-
ctx, cancel := context.WithTimeout(context.Background(), testTimeout)
70-
defer cancel()
71-
72-
eg, ctx := errgroup.WithContext(ctx)
62+
eg, ctx := errgroup.WithContext(context.Background())
7363

7464
numberOfVms := integtest.NumberOfVms
7565
for vmID := 0; vmID < numberOfVms; vmID++ {
@@ -126,7 +116,7 @@ func launchContainerWithRemoteSnapshotterInVM(ctx context.Context, vmID string)
126116
ContainerCount: 1,
127117
})
128118
if err != nil {
129-
return fmt.Errorf("Failed to create microVM[%s] [%v]", vmID, err)
119+
return fmt.Errorf("Failed to create VM[%s]: %w", vmID, err)
130120
}
131121
defer fcClient.StopVM(ctx, &proto.StopVMRequest{VMID: vmID})
132122

@@ -135,16 +125,16 @@ func launchContainerWithRemoteSnapshotterInVM(ctx context.Context, vmID string)
135125
Metadata: fmt.Sprintf(dockerMetadataTemplate, "ghcr.io", noAuth, noAuth),
136126
})
137127
if err != nil {
138-
return fmt.Errorf("Failed to configure VM metadata for registry resolution [%v]", err)
128+
return fmt.Errorf("Failed to configure VM metadata for registry resolution: %w", err)
139129
}
140130

141131
image, err := client.Pull(ctx, al2stargz,
142132
containerd.WithPullUnpack,
143133
containerd.WithPullSnapshotter(snapshotterName),
144-
containerd.WithImageHandlerWrapper(source.AppendDefaultLabelsHandlerWrapper(al2stargz, 10*1024*1024)),
134+
containerd.WithImageHandlerWrapper(source.AppendDefaultLabelsHandlerWrapper(al2stargz, 10*mib)),
145135
)
146136
if err != nil {
147-
return fmt.Errorf("Failed to pull image for VM: %s [%v]", vmID, err)
137+
return fmt.Errorf("Failed to pull image for VM[%s]: %w", vmID, err)
148138
}
149139
defer client.ImageService().Delete(ctx, image.Name())
150140

@@ -160,13 +150,13 @@ func launchContainerWithRemoteSnapshotterInVM(ctx context.Context, vmID string)
160150
),
161151
)
162152
if err != nil {
163-
return fmt.Errorf("Failed to create container in VM: %s, [%v]", vmID, err)
153+
return fmt.Errorf("Failed to create container in VM[%s]: %w", vmID, err)
164154
}
165155
defer container.Delete(ctx, containerd.WithSnapshotCleanup)
166156

167157
_, err = integtest.RunTask(ctx, container)
168158
if err != nil {
169-
return fmt.Errorf("Failed to run task in VM: %s [%v]", vmID, err)
159+
return fmt.Errorf("Failed to run task in VM[%s]: %w", vmID, err)
170160
}
171161
return nil
172162
}

tools/docker/entrypoint.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ EOF
4242

4343
mkdir -p /etc/demux-snapshotter /var/lib/demux-snapshotter
4444
cat > /etc/demux-snapshotter/config.toml <<EOF
45+
[snapshotter.dialer]
46+
ack_msg_timeout_in_seconds = 2
47+
4548
[snapshotter.proxy.address.resolver]
4649
type = "http"
4750
address = "http://127.0.0.1:10001"

0 commit comments

Comments
 (0)