3
3
package vz
4
4
5
5
import (
6
- "context"
7
6
"encoding/binary"
7
+ "errors"
8
8
"io"
9
9
"net"
10
10
"os"
11
+ "sync"
12
+ "syscall"
11
13
"time"
12
14
13
15
"github.com/balajiv113/fd"
14
16
15
17
"github.com/sirupsen/logrus"
16
- "inet.af/tcpproxy" // replaced to github.com/inetaf/tcpproxy in go.mod
17
18
)
18
19
19
20
func PassFDToUnix (unixSock string ) (* os.File , error ) {
@@ -40,7 +41,7 @@ func DialQemu(unixSock string) (*os.File, error) {
40
41
if err != nil {
41
42
return nil , err
42
43
}
43
- qemuConn := & QEMUPacketConn { unixConn : unixConn }
44
+ qemuConn := & qemuPacketConn { Conn : unixConn }
44
45
45
46
server , client , err := createSockPair ()
46
47
if err != nil {
@@ -50,77 +51,117 @@ func DialQemu(unixSock string) (*os.File, error) {
50
51
if err != nil {
51
52
return nil , err
52
53
}
54
+ vzConn := & packetConn {Conn : dgramConn }
53
55
54
- remote := tcpproxy.DialProxy {
55
- DialContext : func (context.Context , string , string ) (net.Conn , error ) {
56
- return dgramConn , nil
57
- },
58
- }
59
- go remote .HandleConn (qemuConn )
56
+ go forwardPackets (qemuConn , vzConn )
60
57
61
58
return client , nil
62
59
}
63
60
64
- // QEMUPacketConn converts raw network packet to a QEMU supported network packet.
65
- type QEMUPacketConn struct {
66
- unixConn net.Conn
61
+ func forwardPackets (qemuConn * qemuPacketConn , vzConn * packetConn ) {
62
+ defer qemuConn .Close ()
63
+ defer vzConn .Close ()
64
+
65
+ var wg sync.WaitGroup
66
+ wg .Add (2 )
67
+
68
+ go func () {
69
+ defer wg .Done ()
70
+ if _ , err := io .Copy (qemuConn , vzConn ); err != nil {
71
+ logrus .Errorf ("Failed to forward packets from VZ to VMNET: %s" , err )
72
+ }
73
+ }()
74
+
75
+ go func () {
76
+ defer wg .Done ()
77
+ if _ , err := io .Copy (vzConn , qemuConn ); err != nil {
78
+ logrus .Errorf ("Failed to forward packets from VMNET to VZ: %s" , err )
79
+ }
80
+ }()
81
+
82
+ wg .Wait ()
67
83
}
68
84
69
- var _ net.Conn = (* QEMUPacketConn )(nil )
85
+ // qemuPacketConn converts raw network packet to a QEMU supported network packet.
86
+ type qemuPacketConn struct {
87
+ net.Conn
88
+ }
70
89
71
- // Read gets rid of the QEMU header packet and returns the raw packet as response.
72
- func (v * QEMUPacketConn ) Read (b []byte ) (n int , err error ) {
73
- header := make ([]byte , 4 )
74
- _ , err = io .ReadFull (v .unixConn , header )
75
- if err != nil {
76
- logrus .Errorln ("Failed to read header" , err )
90
+ // Read reads a QEMU packet and returns the contained raw packet. Returns (len,
91
+ // nil) if a packet was read, and (0, err) on error. Errors means the prorocol
92
+ // is broken and the socket must be closed.
93
+ func (c * qemuPacketConn ) Read (b []byte ) (n int , err error ) {
94
+ var size uint32
95
+ if err := binary .Read (c .Conn , binary .BigEndian , & size ); err != nil {
96
+ // Likely connection closed by peer.
97
+ return 0 , err
77
98
}
78
99
79
- size := binary .BigEndian .Uint32 (header )
80
- reader := io .LimitReader (v .unixConn , int64 (size ))
100
+ reader := io .LimitReader (c .Conn , int64 (size ))
81
101
_ , err = reader .Read (b )
82
102
if err != nil {
83
- logrus .Errorln ("Failed to read packet" , err )
103
+ // Likely connection closed by peer.
104
+ return 0 , err
84
105
}
85
106
return int (size ), nil
86
107
}
87
108
88
- // Write puts QEMU header packet first and then writes the raw packet.
89
- func (v * QEMUPacketConn ) Write (b []byte ) (n int , err error ) {
90
- header := make ([]byte , 4 )
91
- binary .BigEndian .PutUint32 (header , uint32 (len (b )))
92
- _ , err = v .unixConn .Write (header )
93
- if err != nil {
94
- logrus .Errorln ("Failed to write header" , err )
109
+ // Write writes a QEMU packet containing the raw packet. Returns (len(b), nil)
110
+ // if a packet was written, and (0, err) if a packet was not fully written.
111
+ // Errors means the prorocol is broken and the socket must be closed.
112
+ func (c * qemuPacketConn ) Write (b []byte ) (int , error ) {
113
+ size := len (b )
114
+ header := uint32 (size )
115
+ if err := binary .Write (c .Conn , binary .BigEndian , header ); err != nil {
116
+ return 0 , err
95
117
}
96
118
97
- write , err := v .unixConn .Write (b )
98
- if err != nil {
99
- logrus .Errorln ("Failed to write packet" , err )
119
+ start := 0
120
+ for start < size {
121
+ nw , err := c .Conn .Write (b [start :])
122
+ if err != nil {
123
+ return 0 , err
124
+ }
125
+ start += nw
100
126
}
101
- return write , nil
127
+ return size , nil
102
128
}
103
129
104
- func (v * QEMUPacketConn ) Close () error {
105
- return v .unixConn .Close ()
106
- }
107
-
108
- func (v * QEMUPacketConn ) LocalAddr () net.Addr {
109
- return v .unixConn .LocalAddr ()
110
- }
111
-
112
- func (v * QEMUPacketConn ) RemoteAddr () net.Addr {
113
- return v .unixConn .RemoteAddr ()
114
- }
115
-
116
- func (v * QEMUPacketConn ) SetDeadline (t time.Time ) error {
117
- return v .unixConn .SetDeadline (t )
118
- }
130
+ // Testing show that retries are very rare (e.g 24 of 62,499,008 packets) and
131
+ // requires 1 or 2 retries to complete the write. A 100 microseconds sleep loop
132
+ // consumes about 4% CPU on M1 Pro.
133
+ const writeRetryDelay = 100 * time .Microsecond
119
134
120
- func (v * QEMUPacketConn ) SetReadDeadline (t time.Time ) error {
121
- return v .unixConn .SetReadDeadline (t )
135
+ // packetConn handles ENOBUFS errors when writing to unixgram socket.
136
+ type packetConn struct {
137
+ net.Conn
122
138
}
123
139
124
- func (v * QEMUPacketConn ) SetWriteDeadline (t time.Time ) error {
125
- return v .unixConn .SetWriteDeadline (t )
140
+ // Write writes a packet retrying on ENOBUFS errors.
141
+ func (c * packetConn ) Write (b []byte ) (int , error ) {
142
+ var retries uint64
143
+ for {
144
+ n , err := c .Conn .Write (b )
145
+ if n == 0 && err != nil && errors .Is (err , syscall .ENOBUFS ) {
146
+ // This is an expected condition on BSD based system. The kernel
147
+ // does not support blocking until buffer space is available.
148
+ // The only way to recover is to retry the call until it
149
+ // succeeds, or drop the packet.
150
+ // Handled in a similar way in gvisor-tap-vsock:
151
+ // https://github.com/containers/gvisor-tap-vsock/issues/367
152
+ time .Sleep (writeRetryDelay )
153
+ retries ++
154
+ continue
155
+ }
156
+ if err != nil {
157
+ return 0 , err
158
+ }
159
+ if n < len (b ) {
160
+ return n , errors .New ("incomplete write to unixgram socket" )
161
+ }
162
+ if retries > 0 {
163
+ logrus .Debugf ("Write completed after %d retries" , retries )
164
+ }
165
+ return n , nil
166
+ }
126
167
}
0 commit comments