Skip to content

Commit 3361a6e

Browse files
committed
Merge branch 'vsock-virtio' into main
Luigi Leonardi says: ==================== vsock: avoid queuing on intermediate queue if possible This series introduces an optimization for vsock/virtio to reduce latency and increase the throughput: When the guest sends a packet to the host, and the intermediate queue (send_pkt_queue) is empty, if there is enough space, the packet is put directly in the virtqueue. v3->v4 While running experiments on fio with 64B payload, I realized that there was a mistake in my fio configuration, so I re-ran all the experiments and now the latency numbers are indeed lower with the patch applied. I also noticed that I was kicking the host without the lock. - Fixed a configuration mistake on fio and re-ran all experiments. - Fio latency measurement using 64B payload. - virtio_transport_send_skb_fast_path sends kick with the tx_lock acquired - Addressed all minor style changes requested by maintainer. - Rebased on latest net-next - Link to v3: https://lore.kernel.org/r/[email protected] v2->v3 - Performed more experiments using iperf3 using multiple streams - Handling of reply packets removed from virtio_transport_send_skb, as is needed just by the worker. - Removed atomic_inc/atomic_sub when queuing directly to the vq. - Introduced virtio_transport_send_skb_fast_path that handles the steps for sending on the vq. - Fixed a missing mutex_unlock in error path. - Changed authorship of the second commit - Rebased on latest net-next v1->v2 In this v2 I replaced a mutex_lock with a mutex_trylock because it was insidea RCU critical section. I also added a check on tx_run, so if the module is being removed the packet is not queued. I'd like to thank Stefano for reporting the tx_run issue. Applied all Stefano's suggestions: - Minor code style changes - Minor commit text rewrite Performed more experiments: - Check if all the packets go directly to the vq (Matias' suggestion) - Used iperf3 to see if there is any improvement in overall throughput from guest to host - Pinned the vhost process to a pCPU. - Run fio using 512B payload Rebased on latest net-next ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 5fe164f + 18ee44c commit 3361a6e

File tree

10 files changed

+202
-8
lines changed

10 files changed

+202
-8
lines changed

drivers/vhost/vsock.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
244244
restart_tx = true;
245245
}
246246

247-
consume_skb(skb);
247+
virtio_transport_consume_skb_sent(skb, true);
248248
}
249249
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
250250
if (added)
@@ -451,6 +451,8 @@ static struct virtio_transport vhost_transport = {
451451
.notify_buffer_size = virtio_transport_notify_buffer_size,
452452
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
453453

454+
.unsent_bytes = virtio_transport_unsent_bytes,
455+
454456
.read_skb = virtio_transport_read_skb,
455457
},
456458

include/linux/virtio_vsock.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ struct virtio_vsock_sock {
133133
u32 tx_cnt;
134134
u32 peer_fwd_cnt;
135135
u32 peer_buf_alloc;
136+
size_t bytes_unsent;
136137

137138
/* Protected by rx_lock */
138139
u32 fwd_cnt;
@@ -193,6 +194,11 @@ s64 virtio_transport_stream_has_data(struct vsock_sock *vsk);
193194
s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
194195
u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);
195196

197+
ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk);
198+
199+
void virtio_transport_consume_skb_sent(struct sk_buff *skb,
200+
bool consume);
201+
196202
int virtio_transport_do_socket_init(struct vsock_sock *vsk,
197203
struct vsock_sock *psk);
198204
int

include/net/af_vsock.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,9 @@ struct vsock_transport {
169169
void (*notify_buffer_size)(struct vsock_sock *, u64 *);
170170
int (*notify_set_rcvlowat)(struct vsock_sock *vsk, int val);
171171

172+
/* SIOCOUTQ ioctl */
173+
ssize_t (*unsent_bytes)(struct vsock_sock *vsk);
174+
172175
/* Shutdown. */
173176
int (*shutdown)(struct vsock_sock *, int);
174177

net/vmw_vsock/af_vsock.c

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@
112112
#include <net/sock.h>
113113
#include <net/af_vsock.h>
114114
#include <uapi/linux/vm_sockets.h>
115+
#include <uapi/asm-generic/ioctls.h>
115116

116117
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr);
117118
static void vsock_sk_destruct(struct sock *sk);
@@ -1292,6 +1293,57 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
12921293
}
12931294
EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg);
12941295

1296+
static int vsock_do_ioctl(struct socket *sock, unsigned int cmd,
1297+
int __user *arg)
1298+
{
1299+
struct sock *sk = sock->sk;
1300+
struct vsock_sock *vsk;
1301+
int ret;
1302+
1303+
vsk = vsock_sk(sk);
1304+
1305+
switch (cmd) {
1306+
case SIOCOUTQ: {
1307+
ssize_t n_bytes;
1308+
1309+
if (!vsk->transport || !vsk->transport->unsent_bytes) {
1310+
ret = -EOPNOTSUPP;
1311+
break;
1312+
}
1313+
1314+
if (sock_type_connectible(sk->sk_type) && sk->sk_state == TCP_LISTEN) {
1315+
ret = -EINVAL;
1316+
break;
1317+
}
1318+
1319+
n_bytes = vsk->transport->unsent_bytes(vsk);
1320+
if (n_bytes < 0) {
1321+
ret = n_bytes;
1322+
break;
1323+
}
1324+
1325+
ret = put_user(n_bytes, arg);
1326+
break;
1327+
}
1328+
default:
1329+
ret = -ENOIOCTLCMD;
1330+
}
1331+
1332+
return ret;
1333+
}
1334+
1335+
static int vsock_ioctl(struct socket *sock, unsigned int cmd,
1336+
unsigned long arg)
1337+
{
1338+
int ret;
1339+
1340+
lock_sock(sock->sk);
1341+
ret = vsock_do_ioctl(sock, cmd, (int __user *)arg);
1342+
release_sock(sock->sk);
1343+
1344+
return ret;
1345+
}
1346+
12951347
static const struct proto_ops vsock_dgram_ops = {
12961348
.family = PF_VSOCK,
12971349
.owner = THIS_MODULE,
@@ -1302,7 +1354,7 @@ static const struct proto_ops vsock_dgram_ops = {
13021354
.accept = sock_no_accept,
13031355
.getname = vsock_getname,
13041356
.poll = vsock_poll,
1305-
.ioctl = sock_no_ioctl,
1357+
.ioctl = vsock_ioctl,
13061358
.listen = sock_no_listen,
13071359
.shutdown = vsock_shutdown,
13081360
.sendmsg = vsock_dgram_sendmsg,
@@ -2286,7 +2338,7 @@ static const struct proto_ops vsock_stream_ops = {
22862338
.accept = vsock_accept,
22872339
.getname = vsock_getname,
22882340
.poll = vsock_poll,
2289-
.ioctl = sock_no_ioctl,
2341+
.ioctl = vsock_ioctl,
22902342
.listen = vsock_listen,
22912343
.shutdown = vsock_shutdown,
22922344
.setsockopt = vsock_connectible_setsockopt,
@@ -2308,7 +2360,7 @@ static const struct proto_ops vsock_seqpacket_ops = {
23082360
.accept = vsock_accept,
23092361
.getname = vsock_getname,
23102362
.poll = vsock_poll,
2311-
.ioctl = sock_no_ioctl,
2363+
.ioctl = vsock_ioctl,
23122364
.listen = vsock_listen,
23132365
.shutdown = vsock_shutdown,
23142366
.setsockopt = vsock_connectible_setsockopt,

net/vmw_vsock/virtio_transport.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ static void virtio_transport_tx_work(struct work_struct *work)
311311

312312
virtqueue_disable_cb(vq);
313313
while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
314-
consume_skb(skb);
314+
virtio_transport_consume_skb_sent(skb, true);
315315
added = true;
316316
}
317317
} while (!virtqueue_enable_cb(vq));
@@ -540,6 +540,8 @@ static struct virtio_transport virtio_transport = {
540540
.notify_buffer_size = virtio_transport_notify_buffer_size,
541541
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
542542

543+
.unsent_bytes = virtio_transport_unsent_bytes,
544+
543545
.read_skb = virtio_transport_read_skb,
544546
},
545547

net/vmw_vsock/virtio_transport_common.c

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,26 @@ void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *
463463
}
464464
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
465465

466+
void virtio_transport_consume_skb_sent(struct sk_buff *skb, bool consume)
467+
{
468+
struct sock *s = skb->sk;
469+
470+
if (s && skb->len) {
471+
struct vsock_sock *vs = vsock_sk(s);
472+
struct virtio_vsock_sock *vvs;
473+
474+
vvs = vs->trans;
475+
476+
spin_lock_bh(&vvs->tx_lock);
477+
vvs->bytes_unsent -= skb->len;
478+
spin_unlock_bh(&vvs->tx_lock);
479+
}
480+
481+
if (consume)
482+
consume_skb(skb);
483+
}
484+
EXPORT_SYMBOL_GPL(virtio_transport_consume_skb_sent);
485+
466486
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
467487
{
468488
u32 ret;
@@ -475,6 +495,7 @@ u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 credit)
475495
if (ret > credit)
476496
ret = credit;
477497
vvs->tx_cnt += ret;
498+
vvs->bytes_unsent += ret;
478499
spin_unlock_bh(&vvs->tx_lock);
479500

480501
return ret;
@@ -488,6 +509,7 @@ void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit)
488509

489510
spin_lock_bh(&vvs->tx_lock);
490511
vvs->tx_cnt -= credit;
512+
vvs->bytes_unsent -= credit;
491513
spin_unlock_bh(&vvs->tx_lock);
492514
}
493515
EXPORT_SYMBOL_GPL(virtio_transport_put_credit);
@@ -1090,6 +1112,19 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
10901112
}
10911113
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
10921114

1115+
ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
1116+
{
1117+
struct virtio_vsock_sock *vvs = vsk->trans;
1118+
size_t ret;
1119+
1120+
spin_lock_bh(&vvs->tx_lock);
1121+
ret = vvs->bytes_unsent;
1122+
spin_unlock_bh(&vvs->tx_lock);
1123+
1124+
return ret;
1125+
}
1126+
EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
1127+
10931128
static int virtio_transport_reset(struct vsock_sock *vsk,
10941129
struct sk_buff *skb)
10951130
{

net/vmw_vsock/vsock_loopback.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ static struct virtio_transport loopback_transport = {
9898
.notify_buffer_size = virtio_transport_notify_buffer_size,
9999
.notify_set_rcvlowat = virtio_transport_notify_set_rcvlowat,
100100

101+
.unsent_bytes = virtio_transport_unsent_bytes,
102+
101103
.read_skb = virtio_transport_read_skb,
102104
},
103105

@@ -123,6 +125,10 @@ static void vsock_loopback_work(struct work_struct *work)
123125
spin_unlock_bh(&vsock->pkt_queue.lock);
124126

125127
while ((skb = __skb_dequeue(&pkts))) {
128+
/* Decrement the bytes_unsent counter without deallocating skb
129+
* It is freed by the receiver.
130+
*/
131+
virtio_transport_consume_skb_sent(skb, false);
126132
virtio_transport_deliver_tap_pkt(skb);
127133
virtio_transport_recv_pkt(&loopback_transport, skb);
128134
}

tools/testing/vsock/util.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
139139
}
140140

141141
/* Connect to <cid, port> and return the file descriptor. */
142-
static int vsock_connect(unsigned int cid, unsigned int port, int type)
142+
int vsock_connect(unsigned int cid, unsigned int port, int type)
143143
{
144144
union {
145145
struct sockaddr sa;
@@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type)
226226
/* Listen on <cid, port> and return the first incoming connection. The remote
227227
* address is stored to clientaddrp. clientaddrp may be NULL.
228228
*/
229-
static int vsock_accept(unsigned int cid, unsigned int port,
230-
struct sockaddr_vm *clientaddrp, int type)
229+
int vsock_accept(unsigned int cid, unsigned int port,
230+
struct sockaddr_vm *clientaddrp, int type)
231231
{
232232
union {
233233
struct sockaddr sa;

tools/testing/vsock/util.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ struct test_case {
3939
void init_signals(void);
4040
unsigned int parse_cid(const char *str);
4141
unsigned int parse_port(const char *str);
42+
int vsock_connect(unsigned int cid, unsigned int port, int type);
43+
int vsock_accept(unsigned int cid, unsigned int port,
44+
struct sockaddr_vm *clientaddrp, int type);
4245
int vsock_stream_connect(unsigned int cid, unsigned int port);
4346
int vsock_bind_connect(unsigned int cid, unsigned int port,
4447
unsigned int bind_port, int type);

tools/testing/vsock/vsock_test.c

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
#include <sys/mman.h>
2121
#include <poll.h>
2222
#include <signal.h>
23+
#include <sys/ioctl.h>
24+
#include <linux/sockios.h>
2325

2426
#include "vsock_test_zerocopy.h"
2527
#include "timeout.h"
@@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
12381240
}
12391241
}
12401242

1243+
#define MSG_BUF_IOCTL_LEN 64
1244+
static void test_unsent_bytes_server(const struct test_opts *opts, int type)
1245+
{
1246+
unsigned char buf[MSG_BUF_IOCTL_LEN];
1247+
int client_fd;
1248+
1249+
client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
1250+
if (client_fd < 0) {
1251+
perror("accept");
1252+
exit(EXIT_FAILURE);
1253+
}
1254+
1255+
recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
1256+
control_writeln("RECEIVED");
1257+
1258+
close(client_fd);
1259+
}
1260+
1261+
static void test_unsent_bytes_client(const struct test_opts *opts, int type)
1262+
{
1263+
unsigned char buf[MSG_BUF_IOCTL_LEN];
1264+
int ret, fd, sock_bytes_unsent;
1265+
1266+
fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
1267+
if (fd < 0) {
1268+
perror("connect");
1269+
exit(EXIT_FAILURE);
1270+
}
1271+
1272+
for (int i = 0; i < sizeof(buf); i++)
1273+
buf[i] = rand() & 0xFF;
1274+
1275+
send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
1276+
control_expectln("RECEIVED");
1277+
1278+
ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
1279+
if (ret < 0) {
1280+
if (errno == EOPNOTSUPP) {
1281+
fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
1282+
} else {
1283+
perror("ioctl");
1284+
exit(EXIT_FAILURE);
1285+
}
1286+
} else if (ret == 0 && sock_bytes_unsent != 0) {
1287+
fprintf(stderr,
1288+
"Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
1289+
sock_bytes_unsent);
1290+
exit(EXIT_FAILURE);
1291+
}
1292+
1293+
close(fd);
1294+
}
1295+
1296+
static void test_stream_unsent_bytes_client(const struct test_opts *opts)
1297+
{
1298+
test_unsent_bytes_client(opts, SOCK_STREAM);
1299+
}
1300+
1301+
static void test_stream_unsent_bytes_server(const struct test_opts *opts)
1302+
{
1303+
test_unsent_bytes_server(opts, SOCK_STREAM);
1304+
}
1305+
1306+
static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts)
1307+
{
1308+
test_unsent_bytes_client(opts, SOCK_SEQPACKET);
1309+
}
1310+
1311+
static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
1312+
{
1313+
test_unsent_bytes_server(opts, SOCK_SEQPACKET);
1314+
}
1315+
12411316
#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128)
12421317
/* This define is the same as in 'include/linux/virtio_vsock.h':
12431318
* it is used to decide when to send credit update message during
@@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = {
15231598
.run_client = test_stream_rcvlowat_def_cred_upd_client,
15241599
.run_server = test_stream_cred_upd_on_low_rx_bytes,
15251600
},
1601+
{
1602+
.name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes",
1603+
.run_client = test_stream_unsent_bytes_client,
1604+
.run_server = test_stream_unsent_bytes_server,
1605+
},
1606+
{
1607+
.name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes",
1608+
.run_client = test_seqpacket_unsent_bytes_client,
1609+
.run_server = test_seqpacket_unsent_bytes_server,
1610+
},
15261611
{},
15271612
};
15281613

0 commit comments

Comments
 (0)