@@ -74,7 +74,8 @@ static const u64 vhost_net_features[VIRTIO_FEATURES_DWORDS] = {
74
74
(1ULL << VHOST_NET_F_VIRTIO_NET_HDR ) |
75
75
(1ULL << VIRTIO_NET_F_MRG_RXBUF ) |
76
76
(1ULL << VIRTIO_F_ACCESS_PLATFORM ) |
77
- (1ULL << VIRTIO_F_RING_RESET ),
77
+ (1ULL << VIRTIO_F_RING_RESET ) |
78
+ (1ULL << VIRTIO_F_IN_ORDER ),
78
79
VIRTIO_BIT (VIRTIO_NET_F_GUEST_UDP_TUNNEL_GSO ) |
79
80
VIRTIO_BIT (VIRTIO_NET_F_HOST_UDP_TUNNEL_GSO ),
80
81
};
@@ -376,7 +377,8 @@ static void vhost_zerocopy_signal_used(struct vhost_net *net,
376
377
while (j ) {
377
378
add = min (UIO_MAXIOV - nvq -> done_idx , j );
378
379
vhost_add_used_and_signal_n (vq -> dev , vq ,
379
- & vq -> heads [nvq -> done_idx ], add );
380
+ & vq -> heads [nvq -> done_idx ],
381
+ NULL , add );
380
382
nvq -> done_idx = (nvq -> done_idx + add ) % UIO_MAXIOV ;
381
383
j -= add ;
382
384
}
@@ -451,15 +453,17 @@ static int vhost_net_enable_vq(struct vhost_net *n,
451
453
return vhost_poll_start (poll , sock -> file );
452
454
}
453
455
454
- static void vhost_net_signal_used (struct vhost_net_virtqueue * nvq )
456
+ static void vhost_net_signal_used (struct vhost_net_virtqueue * nvq ,
457
+ unsigned int count )
455
458
{
456
459
struct vhost_virtqueue * vq = & nvq -> vq ;
457
460
struct vhost_dev * dev = vq -> dev ;
458
461
459
462
if (!nvq -> done_idx )
460
463
return ;
461
464
462
- vhost_add_used_and_signal_n (dev , vq , vq -> heads , nvq -> done_idx );
465
+ vhost_add_used_and_signal_n (dev , vq , vq -> heads ,
466
+ vq -> nheads , count );
463
467
nvq -> done_idx = 0 ;
464
468
}
465
469
@@ -468,13 +472,20 @@ static void vhost_tx_batch(struct vhost_net *net,
468
472
struct socket * sock ,
469
473
struct msghdr * msghdr )
470
474
{
475
+ struct vhost_virtqueue * vq = & nvq -> vq ;
476
+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
471
477
struct tun_msg_ctl ctl = {
472
478
.type = TUN_MSG_PTR ,
473
479
.num = nvq -> batched_xdp ,
474
480
.ptr = nvq -> xdp ,
475
481
};
476
482
int i , err ;
477
483
484
+ if (in_order ) {
485
+ vq -> heads [0 ].len = 0 ;
486
+ vq -> nheads [0 ] = nvq -> done_idx ;
487
+ }
488
+
478
489
if (nvq -> batched_xdp == 0 )
479
490
goto signal_used ;
480
491
@@ -496,7 +507,7 @@ static void vhost_tx_batch(struct vhost_net *net,
496
507
}
497
508
498
509
signal_used :
499
- vhost_net_signal_used (nvq );
510
+ vhost_net_signal_used (nvq , in_order ? 1 : nvq -> done_idx );
500
511
nvq -> batched_xdp = 0 ;
501
512
}
502
513
@@ -750,6 +761,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
750
761
int sent_pkts = 0 ;
751
762
bool sock_can_batch = (sock -> sk -> sk_sndbuf == INT_MAX );
752
763
bool busyloop_intr ;
764
+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
753
765
754
766
do {
755
767
busyloop_intr = false;
@@ -786,11 +798,13 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
786
798
break ;
787
799
}
788
800
789
- /* We can't build XDP buff, go for single
790
- * packet path but let's flush batched
791
- * packets.
792
- */
793
- vhost_tx_batch (net , nvq , sock , & msg );
801
+ if (nvq -> batched_xdp ) {
802
+ /* We can't build XDP buff, go for single
803
+ * packet path but let's flush batched
804
+ * packets.
805
+ */
806
+ vhost_tx_batch (net , nvq , sock , & msg );
807
+ }
794
808
msg .msg_control = NULL ;
795
809
} else {
796
810
if (tx_can_batch (vq , total_len ))
@@ -811,8 +825,12 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
811
825
pr_debug ("Truncated TX packet: len %d != %zd\n" ,
812
826
err , len );
813
827
done :
814
- vq -> heads [nvq -> done_idx ].id = cpu_to_vhost32 (vq , head );
815
- vq -> heads [nvq -> done_idx ].len = 0 ;
828
+ if (in_order ) {
829
+ vq -> heads [0 ].id = cpu_to_vhost32 (vq , head );
830
+ } else {
831
+ vq -> heads [nvq -> done_idx ].id = cpu_to_vhost32 (vq , head );
832
+ vq -> heads [nvq -> done_idx ].len = 0 ;
833
+ }
816
834
++ nvq -> done_idx ;
817
835
} while (likely (!vhost_exceeds_weight (vq , ++ sent_pkts , total_len )));
818
836
@@ -991,7 +1009,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
991
1009
}
992
1010
993
1011
static int vhost_net_rx_peek_head_len (struct vhost_net * net , struct sock * sk ,
994
- bool * busyloop_intr )
1012
+ bool * busyloop_intr , unsigned int count )
995
1013
{
996
1014
struct vhost_net_virtqueue * rnvq = & net -> vqs [VHOST_NET_VQ_RX ];
997
1015
struct vhost_net_virtqueue * tnvq = & net -> vqs [VHOST_NET_VQ_TX ];
@@ -1001,7 +1019,7 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
1001
1019
1002
1020
if (!len && rvq -> busyloop_timeout ) {
1003
1021
/* Flush batched heads first */
1004
- vhost_net_signal_used (rnvq );
1022
+ vhost_net_signal_used (rnvq , count );
1005
1023
/* Both tx vq and rx socket were polled here */
1006
1024
vhost_net_busy_poll (net , rvq , tvq , busyloop_intr , true);
1007
1025
@@ -1013,22 +1031,25 @@ static int vhost_net_rx_peek_head_len(struct vhost_net *net, struct sock *sk,
1013
1031
1014
1032
/* This is a multi-buffer version of vhost_get_desc, that works if
1015
1033
* vq has read descriptors only.
1016
- * @vq - the relevant virtqueue
1034
+ * @nvq - the relevant vhost_net virtqueue
1017
1035
* @datalen - data length we'll be reading
1018
1036
* @iovcount - returned count of io vectors we fill
1019
1037
* @log - vhost log
1020
1038
* @log_num - log offset
1021
1039
* @quota - headcount quota, 1 for big buffer
1022
1040
* returns number of buffer heads allocated, negative on error
1023
1041
*/
1024
- static int get_rx_bufs (struct vhost_virtqueue * vq ,
1042
+ static int get_rx_bufs (struct vhost_net_virtqueue * nvq ,
1025
1043
struct vring_used_elem * heads ,
1044
+ u16 * nheads ,
1026
1045
int datalen ,
1027
1046
unsigned * iovcount ,
1028
1047
struct vhost_log * log ,
1029
1048
unsigned * log_num ,
1030
1049
unsigned int quota )
1031
1050
{
1051
+ struct vhost_virtqueue * vq = & nvq -> vq ;
1052
+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
1032
1053
unsigned int out , in ;
1033
1054
int seg = 0 ;
1034
1055
int headcount = 0 ;
@@ -1065,14 +1086,16 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
1065
1086
nlogs += * log_num ;
1066
1087
log += * log_num ;
1067
1088
}
1068
- heads [headcount ].id = cpu_to_vhost32 (vq , d );
1069
1089
len = iov_length (vq -> iov + seg , in );
1070
- heads [headcount ].len = cpu_to_vhost32 (vq , len );
1071
- datalen -= len ;
1090
+ if (!in_order ) {
1091
+ heads [headcount ].id = cpu_to_vhost32 (vq , d );
1092
+ heads [headcount ].len = cpu_to_vhost32 (vq , len );
1093
+ }
1072
1094
++ headcount ;
1095
+ datalen -= len ;
1073
1096
seg += in ;
1074
1097
}
1075
- heads [ headcount - 1 ]. len = cpu_to_vhost32 ( vq , len + datalen );
1098
+
1076
1099
* iovcount = seg ;
1077
1100
if (unlikely (log ))
1078
1101
* log_num = nlogs ;
@@ -1082,6 +1105,15 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
1082
1105
r = UIO_MAXIOV + 1 ;
1083
1106
goto err ;
1084
1107
}
1108
+
1109
+ if (!in_order )
1110
+ heads [headcount - 1 ].len = cpu_to_vhost32 (vq , len + datalen );
1111
+ else {
1112
+ heads [0 ].len = cpu_to_vhost32 (vq , len + datalen );
1113
+ heads [0 ].id = cpu_to_vhost32 (vq , d );
1114
+ nheads [0 ] = headcount ;
1115
+ }
1116
+
1085
1117
return headcount ;
1086
1118
err :
1087
1119
vhost_discard_vq_desc (vq , headcount );
@@ -1094,6 +1126,8 @@ static void handle_rx(struct vhost_net *net)
1094
1126
{
1095
1127
struct vhost_net_virtqueue * nvq = & net -> vqs [VHOST_NET_VQ_RX ];
1096
1128
struct vhost_virtqueue * vq = & nvq -> vq ;
1129
+ bool in_order = vhost_has_feature (vq , VIRTIO_F_IN_ORDER );
1130
+ unsigned int count = 0 ;
1097
1131
unsigned in , log ;
1098
1132
struct vhost_log * vq_log ;
1099
1133
struct msghdr msg = {
@@ -1141,12 +1175,13 @@ static void handle_rx(struct vhost_net *net)
1141
1175
1142
1176
do {
1143
1177
sock_len = vhost_net_rx_peek_head_len (net , sock -> sk ,
1144
- & busyloop_intr );
1178
+ & busyloop_intr , count );
1145
1179
if (!sock_len )
1146
1180
break ;
1147
1181
sock_len += sock_hlen ;
1148
1182
vhost_len = sock_len + vhost_hlen ;
1149
- headcount = get_rx_bufs (vq , vq -> heads + nvq -> done_idx ,
1183
+ headcount = get_rx_bufs (nvq , vq -> heads + count ,
1184
+ vq -> nheads + count ,
1150
1185
vhost_len , & in , vq_log , & log ,
1151
1186
likely (mergeable ) ? UIO_MAXIOV : 1 );
1152
1187
/* On error, stop handling until the next kick. */
@@ -1222,8 +1257,11 @@ static void handle_rx(struct vhost_net *net)
1222
1257
goto out ;
1223
1258
}
1224
1259
nvq -> done_idx += headcount ;
1225
- if (nvq -> done_idx > VHOST_NET_BATCH )
1226
- vhost_net_signal_used (nvq );
1260
+ count += in_order ? 1 : headcount ;
1261
+ if (nvq -> done_idx > VHOST_NET_BATCH ) {
1262
+ vhost_net_signal_used (nvq , count );
1263
+ count = 0 ;
1264
+ }
1227
1265
if (unlikely (vq_log ))
1228
1266
vhost_log_write (vq , vq_log , log , vhost_len ,
1229
1267
vq -> iov , in );
@@ -1235,7 +1273,7 @@ static void handle_rx(struct vhost_net *net)
1235
1273
else if (!sock_len )
1236
1274
vhost_net_enable_vq (net , vq );
1237
1275
out :
1238
- vhost_net_signal_used (nvq );
1276
+ vhost_net_signal_used (nvq , count );
1239
1277
mutex_unlock (& vq -> mutex );
1240
1278
}
1241
1279
0 commit comments