@@ -15,125 +15,157 @@ package vm
15
15
16
16
import (
17
17
"context"
18
+ "fmt"
18
19
"net"
19
- "strings"
20
20
"time"
21
21
22
22
"github.com/mdlayher/vsock"
23
+ "github.com/pkg/errors"
23
24
"github.com/sirupsen/logrus"
24
25
)
25
26
26
27
const (
27
- vsockConnectTimeout = 20 * time .Second
28
+ vsockRetryTimeout = 20 * time .Second
29
+ vsockRetryInterval = 100 * time .Millisecond
30
+ unixDialTimeout = 100 * time .Millisecond
31
+ vsockConnectMsgTimeout = 100 * time .Millisecond
32
+ vsockAckMsgTimeout = 1 * time .Second
28
33
)
29
34
30
- // VSockDial attempts to connect to a vsock listener at the provided cid and port with a hardcoded number
31
- // of retries.
32
- func VSockDial (reqCtx context.Context , logger * logrus.Entry , contextID , port uint32 ) (net.Conn , error ) {
33
- // Retries occur every 100ms up to vsockConnectTimeout
34
- const retryInterval = 100 * time .Millisecond
35
- ctx , cancel := context .WithTimeout (reqCtx , vsockConnectTimeout )
35
+ // VSockDial attempts to connect to the Firecracker host-side vsock at the provided unix
36
+ // path and port. It will retry connect attempts if a temporary error is encountered (up
37
+ // to a fixed timeout) or the provided request is canceled.
38
+ func VSockDial (reqCtx context.Context , logger * logrus.Entry , udsPath string , port uint32 ) (net.Conn , error ) {
39
+ ctx , cancel := context .WithTimeout (reqCtx , vsockRetryTimeout )
36
40
defer cancel ()
37
41
42
+ tickerCh := time .NewTicker (vsockRetryInterval ).C
38
43
var attemptCount int
39
- for range time . NewTicker ( retryInterval ). C {
44
+ for {
40
45
attemptCount ++
41
- logger = logger .WithField ("attempt" , attemptCount )
46
+ logger : = logger .WithField ("attempt" , attemptCount )
42
47
43
48
select {
44
49
case <- ctx .Done ():
45
50
return nil , ctx .Err ()
46
- default :
47
- conn , err := vsock .Dial (contextID , port )
48
- if err == nil {
49
- logger .WithField ("connection" , conn ).Debug ("vsock dial succeeded" )
50
- return conn , nil
51
- }
52
-
53
- // ENXIO and ECONNRESET can be returned while the VM+agent are still in the midst of booting
54
- if isTemporaryNetErr (err ) || isENXIO (err ) || isECONNRESET (err ) {
55
- logger .WithError (err ).Debug ("temporary vsock dial failure" )
51
+ case <- tickerCh :
52
+ conn , err := tryConnect (logger , udsPath , port )
53
+ if isTemporaryNetErr (err ) {
54
+ err = errors .Wrap (err , "temporary vsock dial failure" )
55
+ logger .WithError (err ).Debug ()
56
56
continue
57
+ } else if err != nil {
58
+ err = errors .Wrap (err , "non-temporary vsock dial failure" )
59
+ logger .WithError (err ).Error ()
60
+ return nil , err
57
61
}
58
62
59
- logger .WithError (err ).Error ("non-temporary vsock dial failure" )
60
- return nil , err
63
+ return conn , nil
61
64
}
62
65
}
63
-
64
- panic ("unreachable code" ) // appeases the compiler, which doesn't know the for loop is infinite
65
66
}
66
67
67
- // VSockDialConnector provides an IOConnector interface to the VSockDial function.
68
- func VSockDialConnector (contextID , port uint32 ) IOConnector {
69
- return func (procCtx context.Context , logger * logrus.Entry ) <- chan IOConnectorResult {
70
- returnCh := make (chan IOConnectorResult )
71
-
72
- go func () {
73
- defer close (returnCh )
74
-
75
- conn , err := VSockDial (procCtx , logger , contextID , port )
76
- returnCh <- IOConnectorResult {
77
- ReadWriteCloser : conn ,
78
- Err : err ,
79
- }
80
- }()
81
-
82
- return returnCh
83
- }
68
+ type vsockListener struct {
69
+ listener net.Listener
70
+ port uint32
71
+ ctx context.Context
72
+ logger * logrus.Entry
84
73
}
85
74
86
- func vsockAccept (reqCtx context.Context , logger * logrus.Entry , port uint32 ) (net.Conn , error ) {
75
+ // VSockListener returns a net.Listener implementation for guest-side Firecracker vsock
76
+ // connections.
77
+ func VSockListener (ctx context.Context , logger * logrus.Entry , port uint32 ) (net.Listener , error ) {
87
78
listener , err := vsock .Listen (port )
88
79
if err != nil {
89
80
return nil , err
90
81
}
91
82
92
- defer listener .Close ()
83
+ return vsockListener {
84
+ listener : listener ,
85
+ port : port ,
86
+ ctx : ctx ,
87
+ logger : logger ,
88
+ }, nil
89
+ }
93
90
94
- // Retries occur every 10ms up to vsockConnectTimeout
95
- const retryInterval = 10 * time .Millisecond
96
- ctx , cancel := context .WithTimeout (reqCtx , vsockConnectTimeout )
91
+ func (l vsockListener ) Accept () (net.Conn , error ) {
92
+ ctx , cancel := context .WithTimeout (l .ctx , vsockRetryTimeout )
97
93
defer cancel ()
98
94
99
95
var attemptCount int
100
- for range time .NewTicker (retryInterval ).C {
96
+ tickerCh := time .NewTicker (vsockRetryInterval ).C
97
+ for {
101
98
attemptCount ++
102
- logger = logger .WithField ("attempt" , attemptCount )
99
+ logger := l . logger .WithField ("attempt" , attemptCount )
103
100
104
101
select {
105
102
case <- ctx .Done ():
106
103
return nil , ctx .Err ()
107
- default :
108
- // accept is non-blocking so try to accept until we get a connection
109
- conn , err := listener .Accept ()
110
- if err == nil {
111
- return conn , nil
112
- }
113
-
104
+ case <- tickerCh :
105
+ conn , err := tryAccept (logger , l .listener , l .port )
114
106
if isTemporaryNetErr (err ) {
115
- logger .WithError (err ).Debug ("temporary stdio vsock accept failure" )
107
+ err = errors .Wrap (err , "temporary vsock accept failure" )
108
+ logger .WithError (err ).Debug ()
116
109
continue
110
+ } else if err != nil {
111
+ err = errors .Wrap (err , "non-temporary vsock accept failure" )
112
+ logger .WithError (err ).Error ()
113
+ return nil , err
117
114
}
118
115
119
- logger .WithError (err ).Error ("non-temporary stdio vsock accept failure" )
120
- return nil , err
116
+ return conn , nil
121
117
}
122
118
}
119
+ }
120
+
121
+ func (l vsockListener ) Close () error {
122
+ return l .listener .Close ()
123
+ }
124
+
125
+ func (l vsockListener ) Addr () net.Addr {
126
+ return l .listener .Addr ()
127
+ }
123
128
124
- panic ("unreachable code" ) // appeases the compiler, which doesn't know the for loop is infinite
129
+ // VSockDialConnector returns an IOConnector for establishing vsock connections
130
+ // that are dialed from the host to a guest listener.
131
+ func VSockDialConnector (udsPath string , port uint32 ) IOConnector {
132
+ return func (procCtx context.Context , logger * logrus.Entry ) <- chan IOConnectorResult {
133
+ returnCh := make (chan IOConnectorResult )
134
+
135
+ go func () {
136
+ defer close (returnCh )
137
+
138
+ conn , err := VSockDial (procCtx , logger , udsPath , port )
139
+ returnCh <- IOConnectorResult {
140
+ ReadWriteCloser : conn ,
141
+ Err : err ,
142
+ }
143
+ }()
144
+
145
+ return returnCh
146
+ }
125
147
}
126
148
127
- // VSockAcceptConnector provides an IOConnector that establishes the connection by listening on the provided
128
- // vsock port and accepting the first connection that comes in.
149
+ // VSockAcceptConnector provides an IOConnector that establishes the connection by listening
150
+ // on the provided guest-side vsock port and accepting the first connection that comes in.
129
151
func VSockAcceptConnector (port uint32 ) IOConnector {
130
152
return func (procCtx context.Context , logger * logrus.Entry ) <- chan IOConnectorResult {
131
153
returnCh := make (chan IOConnectorResult )
132
154
155
+ listener , err := VSockListener (procCtx , logger , port )
156
+ if err != nil {
157
+ returnCh <- IOConnectorResult {
158
+ Err : err ,
159
+ }
160
+ close (returnCh )
161
+ return returnCh
162
+ }
163
+
133
164
go func () {
134
165
defer close (returnCh )
166
+ defer listener .Close ()
135
167
136
- conn , err := vsockAccept ( procCtx , logger , port )
168
+ conn , err := listener . Accept ( )
137
169
returnCh <- IOConnectorResult {
138
170
ReadWriteCloser : conn ,
139
171
Err : err ,
@@ -144,23 +176,153 @@ func VSockAcceptConnector(port uint32) IOConnector {
144
176
}
145
177
}
146
178
147
- func isTemporaryNetErr (err error ) bool {
148
- terr , ok := err .(interface {
149
- Temporary () bool
150
- })
179
+ func vsockConnectMsg (port uint32 ) string {
180
+ // The message a host-side connection must write after connecting to a firecracker
181
+ // vsock unix socket in order to establish a connection with a guest-side listener
182
+ // at the provided port number. This is specified in Firecracker documentation:
183
+ // https://github.com/firecracker-microvm/firecracker/blob/master/docs/vsock.md#host-initiated-connections
184
+ return fmt .Sprintf ("CONNECT %d\n " , port )
185
+ }
151
186
152
- return err != nil && ok && terr .Temporary ()
187
+ func vsockAckMsg (port uint32 ) string {
188
+ // The message a guest-side connection will write after accepting a connection from
189
+ // a host dial. This is not part of the official Firecracker vsock spec, but is
190
+ // recommended in order to allow the host to verify connections were established
191
+ // successfully: https://github.com/firecracker-microvm/firecracker/issues/1272#issuecomment-533004066
192
+ return fmt .Sprintf ("IMALIVE %d\n " , port )
193
+ }
194
+
195
+ // tryConnect attempts to dial a guest vsock listener at the provided host-side
196
+ // unix socket and provided guest-listener port.
197
+ func tryConnect (logger * logrus.Entry , udsPath string , port uint32 ) (net.Conn , error ) {
198
+ conn , err := net .DialTimeout ("unix" , udsPath , unixDialTimeout )
199
+ if err != nil {
200
+ return nil , err
201
+ }
202
+
203
+ defer func () {
204
+ if err != nil {
205
+ closeErr := conn .Close ()
206
+ if closeErr != nil {
207
+ logger .WithError (closeErr ).Error (
208
+ "failed to close vsock socket after previous error" )
209
+ }
210
+ }
211
+ }()
212
+
213
+ err = tryConnWrite (conn , vsockConnectMsg (port ), vsockConnectMsgTimeout )
214
+ if err != nil {
215
+ return nil , vsockConnectMsgError {cause : err }
216
+ }
217
+
218
+ err = tryConnRead (conn , vsockAckMsg (port ), vsockAckMsgTimeout )
219
+ if err != nil {
220
+ return nil , vsockAckError {cause : err }
221
+ }
222
+ return conn , nil
223
+ }
224
+
225
+ // tryAccept attempts to accept a single host-side connection from the provided
226
+ // guest-side listener at the provided port.
227
+ func tryAccept (logger * logrus.Entry , listener net.Listener , port uint32 ) (net.Conn , error ) {
228
+ conn , err := listener .Accept ()
229
+ if err != nil {
230
+ return nil , err
231
+ }
232
+
233
+ defer func () {
234
+ if err != nil {
235
+ closeErr := conn .Close ()
236
+ if closeErr != nil {
237
+ logger .WithError (closeErr ).Error (
238
+ "failed to close vsock after previous error" )
239
+ }
240
+ }
241
+ }()
242
+
243
+ err = tryConnWrite (conn , vsockAckMsg (port ), vsockAckMsgTimeout )
244
+ if err != nil {
245
+ return nil , vsockAckError {cause : err }
246
+ }
247
+
248
+ return conn , nil
249
+ }
250
+
251
+ // tryConnRead will try to do a read from the provided conn, returning an error if
252
+ // the bytes read does not match what was provided or if the read does not complete
253
+ // within the provided timeout. It will reset socket deadlines to none after returning.
254
+ // It's only intended to be used for connect/ack messages, not general purpose reads
255
+ // after the vsock connection is established fully.
256
+ func tryConnRead (conn net.Conn , expectedRead string , timeout time.Duration ) error {
257
+ conn .SetDeadline (time .Now ().Add (timeout ))
258
+ defer conn .SetDeadline (time.Time {})
259
+
260
+ actualRead := make ([]byte , len (expectedRead ))
261
+ _ , err := conn .Read (actualRead )
262
+ if err != nil {
263
+ return err
264
+ }
265
+
266
+ if expectedRead != string (actualRead ) {
267
+ return errors .Errorf ("expected to read %q, but instead read %q" ,
268
+ expectedRead , string (actualRead ))
269
+ }
270
+
271
+ return nil
272
+ }
273
+
274
+ // tryConnWrite will try to do a write to the provided conn, returning an error if
275
+ // the write fails, is partial or does not complete within the provided timeout. It
276
+ // will reset socket deadlines to none after returning. It's only intended to be
277
+ // used for connect/ack messages, not general purpose writes after the vsock
278
+ // connection is established fully.
279
+ func tryConnWrite (conn net.Conn , expectedWrite string , timeout time.Duration ) error {
280
+ conn .SetDeadline (time .Now ().Add (timeout ))
281
+ defer conn .SetDeadline (time.Time {})
282
+
283
+ bytesWritten , err := conn .Write ([]byte (expectedWrite ))
284
+ if err != nil {
285
+ return err
286
+ }
287
+ if bytesWritten != len (expectedWrite ) {
288
+ return errors .Errorf ("incomplete write, expected %d bytes but wrote %d" ,
289
+ len (expectedWrite ), bytesWritten )
290
+ }
291
+
292
+ return nil
153
293
}
154
294
155
- // Unfortunately, as "documented" on various online forums, there's no ideal way to
156
- // test for actual Linux error codes returned by the net library or wrappers
157
- // around that library. The common approach is to fall back on string matching,
158
- // which is done for the functions below
295
+ type vsockConnectMsgError struct {
296
+ cause error
297
+ }
159
298
160
- func isENXIO ( err error ) bool {
161
- return strings . HasSuffix ( err . Error () , "no such device" )
299
+ func ( e vsockConnectMsgError ) Error () string {
300
+ return errors . Wrap ( e . cause , "vsock connect message failure" ). Error ( )
162
301
}
163
302
164
- func isECONNRESET (err error ) bool {
165
- return strings .HasSuffix (err .Error (), "connection reset by peer" )
303
+ func (e vsockConnectMsgError ) Temporary () bool {
304
+ return false
305
+ }
306
+
307
+ type vsockAckError struct {
308
+ cause error
309
+ }
310
+
311
+ func (e vsockAckError ) Error () string {
312
+ return errors .Wrap (e .cause , "vsock ack message failure" ).Error ()
313
+ }
314
+
315
+ func (e vsockAckError ) Temporary () bool {
316
+ return true
317
+ }
318
+
319
+ // isTemporaryNetErr returns whether the provided error is a retriable
320
+ // error, according to the interface defined here:
321
+ // https://golang.org/pkg/net/#Error
322
+ func isTemporaryNetErr (err error ) bool {
323
+ terr , ok := err .(interface {
324
+ Temporary () bool
325
+ })
326
+
327
+ return err != nil && ok && terr .Temporary ()
166
328
}
0 commit comments