Skip to content

Commit 9d17bdb

Browse files
sipsmaaustinvazquez
authored andcommitted
Upgrade to Firecracker v0.18.0.
This consists of: * Upgrading to the new Go SDK version. * Using the new vsock interface with unix-domain socket backend on the host. Signed-off-by: Erik Sipsma <[email protected]>
1 parent 6fc55ba commit 9d17bdb

File tree

1 file changed

+237
-75
lines changed

1 file changed

+237
-75
lines changed

internal/vm/vsock.go

Lines changed: 237 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -15,125 +15,157 @@ package vm
1515

1616
import (
1717
"context"
18+
"fmt"
1819
"net"
19-
"strings"
2020
"time"
2121

2222
"github.com/mdlayher/vsock"
23+
"github.com/pkg/errors"
2324
"github.com/sirupsen/logrus"
2425
)
2526

2627
const (
27-
vsockConnectTimeout = 20 * time.Second
28+
vsockRetryTimeout = 20 * time.Second
29+
vsockRetryInterval = 100 * time.Millisecond
30+
unixDialTimeout = 100 * time.Millisecond
31+
vsockConnectMsgTimeout = 100 * time.Millisecond
32+
vsockAckMsgTimeout = 1 * time.Second
2833
)
2934

30-
// VSockDial attempts to connect to a vsock listener at the provided cid and port with a hardcoded number
31-
// of retries.
32-
func VSockDial(reqCtx context.Context, logger *logrus.Entry, contextID, port uint32) (net.Conn, error) {
33-
// Retries occur every 100ms up to vsockConnectTimeout
34-
const retryInterval = 100 * time.Millisecond
35-
ctx, cancel := context.WithTimeout(reqCtx, vsockConnectTimeout)
35+
// VSockDial attempts to connect to the Firecracker host-side vsock at the provided unix
36+
// path and port. It will retry connect attempts if a temporary error is encountered (up
37+
// to a fixed timeout) or the provided request is canceled.
38+
func VSockDial(reqCtx context.Context, logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
39+
ctx, cancel := context.WithTimeout(reqCtx, vsockRetryTimeout)
3640
defer cancel()
3741

42+
tickerCh := time.NewTicker(vsockRetryInterval).C
3843
var attemptCount int
39-
for range time.NewTicker(retryInterval).C {
44+
for {
4045
attemptCount++
41-
logger = logger.WithField("attempt", attemptCount)
46+
logger := logger.WithField("attempt", attemptCount)
4247

4348
select {
4449
case <-ctx.Done():
4550
return nil, ctx.Err()
46-
default:
47-
conn, err := vsock.Dial(contextID, port)
48-
if err == nil {
49-
logger.WithField("connection", conn).Debug("vsock dial succeeded")
50-
return conn, nil
51-
}
52-
53-
// ENXIO and ECONNRESET can be returned while the VM+agent are still in the midst of booting
54-
if isTemporaryNetErr(err) || isENXIO(err) || isECONNRESET(err) {
55-
logger.WithError(err).Debug("temporary vsock dial failure")
51+
case <-tickerCh:
52+
conn, err := tryConnect(logger, udsPath, port)
53+
if isTemporaryNetErr(err) {
54+
err = errors.Wrap(err, "temporary vsock dial failure")
55+
logger.WithError(err).Debug()
5656
continue
57+
} else if err != nil {
58+
err = errors.Wrap(err, "non-temporary vsock dial failure")
59+
logger.WithError(err).Error()
60+
return nil, err
5761
}
5862

59-
logger.WithError(err).Error("non-temporary vsock dial failure")
60-
return nil, err
63+
return conn, nil
6164
}
6265
}
63-
64-
panic("unreachable code") // appeases the compiler, which doesn't know the for loop is infinite
6566
}
6667

67-
// VSockDialConnector provides an IOConnector interface to the VSockDial function.
68-
func VSockDialConnector(contextID, port uint32) IOConnector {
69-
return func(procCtx context.Context, logger *logrus.Entry) <-chan IOConnectorResult {
70-
returnCh := make(chan IOConnectorResult)
71-
72-
go func() {
73-
defer close(returnCh)
74-
75-
conn, err := VSockDial(procCtx, logger, contextID, port)
76-
returnCh <- IOConnectorResult{
77-
ReadWriteCloser: conn,
78-
Err: err,
79-
}
80-
}()
81-
82-
return returnCh
83-
}
68+
type vsockListener struct {
69+
listener net.Listener
70+
port uint32
71+
ctx context.Context
72+
logger *logrus.Entry
8473
}
8574

86-
func vsockAccept(reqCtx context.Context, logger *logrus.Entry, port uint32) (net.Conn, error) {
75+
// VSockListener returns a net.Listener implementation for guest-side Firecracker vsock
76+
// connections.
77+
func VSockListener(ctx context.Context, logger *logrus.Entry, port uint32) (net.Listener, error) {
8778
listener, err := vsock.Listen(port)
8879
if err != nil {
8980
return nil, err
9081
}
9182

92-
defer listener.Close()
83+
return vsockListener{
84+
listener: listener,
85+
port: port,
86+
ctx: ctx,
87+
logger: logger,
88+
}, nil
89+
}
9390

94-
// Retries occur every 10ms up to vsockConnectTimeout
95-
const retryInterval = 10 * time.Millisecond
96-
ctx, cancel := context.WithTimeout(reqCtx, vsockConnectTimeout)
91+
func (l vsockListener) Accept() (net.Conn, error) {
92+
ctx, cancel := context.WithTimeout(l.ctx, vsockRetryTimeout)
9793
defer cancel()
9894

9995
var attemptCount int
100-
for range time.NewTicker(retryInterval).C {
96+
tickerCh := time.NewTicker(vsockRetryInterval).C
97+
for {
10198
attemptCount++
102-
logger = logger.WithField("attempt", attemptCount)
99+
logger := l.logger.WithField("attempt", attemptCount)
103100

104101
select {
105102
case <-ctx.Done():
106103
return nil, ctx.Err()
107-
default:
108-
// accept is non-blocking so try to accept until we get a connection
109-
conn, err := listener.Accept()
110-
if err == nil {
111-
return conn, nil
112-
}
113-
104+
case <-tickerCh:
105+
conn, err := tryAccept(logger, l.listener, l.port)
114106
if isTemporaryNetErr(err) {
115-
logger.WithError(err).Debug("temporary stdio vsock accept failure")
107+
err = errors.Wrap(err, "temporary vsock accept failure")
108+
logger.WithError(err).Debug()
116109
continue
110+
} else if err != nil {
111+
err = errors.Wrap(err, "non-temporary vsock accept failure")
112+
logger.WithError(err).Error()
113+
return nil, err
117114
}
118115

119-
logger.WithError(err).Error("non-temporary stdio vsock accept failure")
120-
return nil, err
116+
return conn, nil
121117
}
122118
}
119+
}
120+
121+
func (l vsockListener) Close() error {
122+
return l.listener.Close()
123+
}
124+
125+
func (l vsockListener) Addr() net.Addr {
126+
return l.listener.Addr()
127+
}
123128

124-
panic("unreachable code") // appeases the compiler, which doesn't know the for loop is infinite
129+
// VSockDialConnector returns an IOConnector for establishing vsock connections
130+
// that are dialed from the host to a guest listener.
131+
func VSockDialConnector(udsPath string, port uint32) IOConnector {
132+
return func(procCtx context.Context, logger *logrus.Entry) <-chan IOConnectorResult {
133+
returnCh := make(chan IOConnectorResult)
134+
135+
go func() {
136+
defer close(returnCh)
137+
138+
conn, err := VSockDial(procCtx, logger, udsPath, port)
139+
returnCh <- IOConnectorResult{
140+
ReadWriteCloser: conn,
141+
Err: err,
142+
}
143+
}()
144+
145+
return returnCh
146+
}
125147
}
126148

127-
// VSockAcceptConnector provides an IOConnector that establishes the connection by listening on the provided
128-
// vsock port and accepting the first connection that comes in.
149+
// VSockAcceptConnector provides an IOConnector that establishes the connection by listening
150+
// on the provided guest-side vsock port and accepting the first connection that comes in.
129151
func VSockAcceptConnector(port uint32) IOConnector {
130152
return func(procCtx context.Context, logger *logrus.Entry) <-chan IOConnectorResult {
131153
returnCh := make(chan IOConnectorResult)
132154

155+
listener, err := VSockListener(procCtx, logger, port)
156+
if err != nil {
157+
returnCh <- IOConnectorResult{
158+
Err: err,
159+
}
160+
close(returnCh)
161+
return returnCh
162+
}
163+
133164
go func() {
134165
defer close(returnCh)
166+
defer listener.Close()
135167

136-
conn, err := vsockAccept(procCtx, logger, port)
168+
conn, err := listener.Accept()
137169
returnCh <- IOConnectorResult{
138170
ReadWriteCloser: conn,
139171
Err: err,
@@ -144,23 +176,153 @@ func VSockAcceptConnector(port uint32) IOConnector {
144176
}
145177
}
146178

147-
func isTemporaryNetErr(err error) bool {
148-
terr, ok := err.(interface {
149-
Temporary() bool
150-
})
179+
func vsockConnectMsg(port uint32) string {
180+
// The message a host-side connection must write after connecting to a firecracker
181+
// vsock unix socket in order to establish a connection with a guest-side listener
182+
// at the provided port number. This is specified in Firecracker documentation:
183+
// https://github.com/firecracker-microvm/firecracker/blob/master/docs/vsock.md#host-initiated-connections
184+
return fmt.Sprintf("CONNECT %d\n", port)
185+
}
151186

152-
return err != nil && ok && terr.Temporary()
187+
func vsockAckMsg(port uint32) string {
188+
// The message a guest-side connection will write after accepting a connection from
189+
// a host dial. This is not part of the official Firecracker vsock spec, but is
190+
// recommended in order to allow the host to verify connections were established
191+
// successfully: https://github.com/firecracker-microvm/firecracker/issues/1272#issuecomment-533004066
192+
return fmt.Sprintf("IMALIVE %d\n", port)
193+
}
194+
195+
// tryConnect attempts to dial a guest vsock listener at the provided host-side
196+
// unix socket and provided guest-listener port.
197+
func tryConnect(logger *logrus.Entry, udsPath string, port uint32) (net.Conn, error) {
198+
conn, err := net.DialTimeout("unix", udsPath, unixDialTimeout)
199+
if err != nil {
200+
return nil, err
201+
}
202+
203+
defer func() {
204+
if err != nil {
205+
closeErr := conn.Close()
206+
if closeErr != nil {
207+
logger.WithError(closeErr).Error(
208+
"failed to close vsock socket after previous error")
209+
}
210+
}
211+
}()
212+
213+
err = tryConnWrite(conn, vsockConnectMsg(port), vsockConnectMsgTimeout)
214+
if err != nil {
215+
return nil, vsockConnectMsgError{cause: err}
216+
}
217+
218+
err = tryConnRead(conn, vsockAckMsg(port), vsockAckMsgTimeout)
219+
if err != nil {
220+
return nil, vsockAckError{cause: err}
221+
}
222+
return conn, nil
223+
}
224+
225+
// tryAccept attempts to accept a single host-side connection from the provided
226+
// guest-side listener at the provided port.
227+
func tryAccept(logger *logrus.Entry, listener net.Listener, port uint32) (net.Conn, error) {
228+
conn, err := listener.Accept()
229+
if err != nil {
230+
return nil, err
231+
}
232+
233+
defer func() {
234+
if err != nil {
235+
closeErr := conn.Close()
236+
if closeErr != nil {
237+
logger.WithError(closeErr).Error(
238+
"failed to close vsock after previous error")
239+
}
240+
}
241+
}()
242+
243+
err = tryConnWrite(conn, vsockAckMsg(port), vsockAckMsgTimeout)
244+
if err != nil {
245+
return nil, vsockAckError{cause: err}
246+
}
247+
248+
return conn, nil
249+
}
250+
251+
// tryConnRead will try to do a read from the provided conn, returning an error if
252+
// the bytes read does not match what was provided or if the read does not complete
253+
// within the provided timeout. It will reset socket deadlines to none after returning.
254+
// It's only intended to be used for connect/ack messages, not general purpose reads
255+
// after the vsock connection is established fully.
256+
func tryConnRead(conn net.Conn, expectedRead string, timeout time.Duration) error {
257+
conn.SetDeadline(time.Now().Add(timeout))
258+
defer conn.SetDeadline(time.Time{})
259+
260+
actualRead := make([]byte, len(expectedRead))
261+
_, err := conn.Read(actualRead)
262+
if err != nil {
263+
return err
264+
}
265+
266+
if expectedRead != string(actualRead) {
267+
return errors.Errorf("expected to read %q, but instead read %q",
268+
expectedRead, string(actualRead))
269+
}
270+
271+
return nil
272+
}
273+
274+
// tryConnWrite will try to do a write to the provided conn, returning an error if
275+
// the write fails, is partial or does not complete within the provided timeout. It
276+
// will reset socket deadlines to none after returning. It's only intended to be
277+
// used for connect/ack messages, not general purpose writes after the vsock
278+
// connection is established fully.
279+
func tryConnWrite(conn net.Conn, expectedWrite string, timeout time.Duration) error {
280+
conn.SetDeadline(time.Now().Add(timeout))
281+
defer conn.SetDeadline(time.Time{})
282+
283+
bytesWritten, err := conn.Write([]byte(expectedWrite))
284+
if err != nil {
285+
return err
286+
}
287+
if bytesWritten != len(expectedWrite) {
288+
return errors.Errorf("incomplete write, expected %d bytes but wrote %d",
289+
len(expectedWrite), bytesWritten)
290+
}
291+
292+
return nil
153293
}
154294

155-
// Unfortunately, as "documented" on various online forums, there's no ideal way to
156-
// test for actual Linux error codes returned by the net library or wrappers
157-
// around that library. The common approach is to fall back on string matching,
158-
// which is done for the functions below
295+
type vsockConnectMsgError struct {
296+
cause error
297+
}
159298

160-
func isENXIO(err error) bool {
161-
return strings.HasSuffix(err.Error(), "no such device")
299+
func (e vsockConnectMsgError) Error() string {
300+
return errors.Wrap(e.cause, "vsock connect message failure").Error()
162301
}
163302

164-
func isECONNRESET(err error) bool {
165-
return strings.HasSuffix(err.Error(), "connection reset by peer")
303+
func (e vsockConnectMsgError) Temporary() bool {
304+
return false
305+
}
306+
307+
type vsockAckError struct {
308+
cause error
309+
}
310+
311+
func (e vsockAckError) Error() string {
312+
return errors.Wrap(e.cause, "vsock ack message failure").Error()
313+
}
314+
315+
func (e vsockAckError) Temporary() bool {
316+
return true
317+
}
318+
319+
// isTemporaryNetErr returns whether the provided error is a retriable
320+
// error, according to the interface defined here:
321+
// https://golang.org/pkg/net/#Error
322+
func isTemporaryNetErr(err error) bool {
323+
terr, ok := err.(interface {
324+
Temporary() bool
325+
})
326+
327+
return err != nil && ok && terr.Temporary()
166328
}

0 commit comments

Comments
 (0)