22
22
#include <linux/interrupt.h>
23
23
#include <linux/firmware.h>
24
24
#include <linux/fs.h>
25
+ #include <asm/atomic.h>
25
26
#include <uapi/linux/filter.h>
26
27
#include <init.h>
27
28
#include <irq_kern.h>
@@ -102,18 +103,33 @@ static const struct {
102
103
103
104
static void vector_reset_stats (struct vector_private * vp )
104
105
{
106
+ /* We reuse the existing queue locks for stats */
107
+
108
+ /* RX stats are modified with RX head_lock held
109
+ * in vector_poll.
110
+ */
111
+
112
+ spin_lock (& vp -> rx_queue -> head_lock );
105
113
vp -> estats .rx_queue_max = 0 ;
106
114
vp -> estats .rx_queue_running_average = 0 ;
107
- vp -> estats .tx_queue_max = 0 ;
108
- vp -> estats .tx_queue_running_average = 0 ;
109
115
vp -> estats .rx_encaps_errors = 0 ;
116
+ vp -> estats .sg_ok = 0 ;
117
+ vp -> estats .sg_linearized = 0 ;
118
+ spin_unlock (& vp -> rx_queue -> head_lock );
119
+
120
+ /* TX stats are modified with TX head_lock held
121
+ * in vector_send.
122
+ */
123
+
124
+ spin_lock (& vp -> tx_queue -> head_lock );
110
125
vp -> estats .tx_timeout_count = 0 ;
111
126
vp -> estats .tx_restart_queue = 0 ;
112
127
vp -> estats .tx_kicks = 0 ;
113
128
vp -> estats .tx_flow_control_xon = 0 ;
114
129
vp -> estats .tx_flow_control_xoff = 0 ;
115
- vp -> estats .sg_ok = 0 ;
116
- vp -> estats .sg_linearized = 0 ;
130
+ vp -> estats .tx_queue_max = 0 ;
131
+ vp -> estats .tx_queue_running_average = 0 ;
132
+ spin_unlock (& vp -> tx_queue -> head_lock );
117
133
}
118
134
119
135
static int get_mtu (struct arglist * def )
@@ -232,12 +248,6 @@ static int get_transport_options(struct arglist *def)
232
248
233
249
static char * drop_buffer ;
234
250
235
- /* Array backed queues optimized for bulk enqueue/dequeue and
236
- * 1:N (small values of N) or 1:1 enqueuer/dequeuer ratios.
237
- * For more details and full design rationale see
238
- * http://foswiki.cambridgegreys.com/Main/EatYourTailAndEnjoyIt
239
- */
240
-
241
251
242
252
/*
243
253
* Advance the mmsg queue head by n = advance. Resets the queue to
@@ -247,27 +257,13 @@ static char *drop_buffer;
247
257
248
258
static int vector_advancehead (struct vector_queue * qi , int advance )
249
259
{
250
- int queue_depth ;
251
-
252
260
qi -> head =
253
261
(qi -> head + advance )
254
262
% qi -> max_depth ;
255
263
256
264
257
- spin_lock (& qi -> tail_lock );
258
- qi -> queue_depth -= advance ;
259
-
260
- /* we are at 0, use this to
261
- * reset head and tail so we can use max size vectors
262
- */
263
-
264
- if (qi -> queue_depth == 0 ) {
265
- qi -> head = 0 ;
266
- qi -> tail = 0 ;
267
- }
268
- queue_depth = qi -> queue_depth ;
269
- spin_unlock (& qi -> tail_lock );
270
- return queue_depth ;
265
+ atomic_sub (advance , & qi -> queue_depth );
266
+ return atomic_read (& qi -> queue_depth );
271
267
}
272
268
273
269
/* Advance the queue tail by n = advance.
@@ -277,16 +273,11 @@ static int vector_advancehead(struct vector_queue *qi, int advance)
277
273
278
274
static int vector_advancetail (struct vector_queue * qi , int advance )
279
275
{
280
- int queue_depth ;
281
-
282
276
qi -> tail =
283
277
(qi -> tail + advance )
284
278
% qi -> max_depth ;
285
- spin_lock (& qi -> head_lock );
286
- qi -> queue_depth += advance ;
287
- queue_depth = qi -> queue_depth ;
288
- spin_unlock (& qi -> head_lock );
289
- return queue_depth ;
279
+ atomic_add (advance , & qi -> queue_depth );
280
+ return atomic_read (& qi -> queue_depth );
290
281
}
291
282
292
283
static int prep_msg (struct vector_private * vp ,
@@ -339,9 +330,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
339
330
int iov_count ;
340
331
341
332
spin_lock (& qi -> tail_lock );
342
- spin_lock (& qi -> head_lock );
343
- queue_depth = qi -> queue_depth ;
344
- spin_unlock (& qi -> head_lock );
333
+ queue_depth = atomic_read (& qi -> queue_depth );
345
334
346
335
if (skb )
347
336
packet_len = skb -> len ;
@@ -360,6 +349,7 @@ static int vector_enqueue(struct vector_queue *qi, struct sk_buff *skb)
360
349
mmsg_vector -> msg_hdr .msg_iovlen = iov_count ;
361
350
mmsg_vector -> msg_hdr .msg_name = vp -> fds -> remote_addr ;
362
351
mmsg_vector -> msg_hdr .msg_namelen = vp -> fds -> remote_addr_size ;
352
+ wmb (); /* Make the packet visible to the NAPI poll thread */
363
353
queue_depth = vector_advancetail (qi , 1 );
364
354
} else
365
355
goto drop ;
@@ -398,7 +388,7 @@ static int consume_vector_skbs(struct vector_queue *qi, int count)
398
388
}
399
389
400
390
/*
401
- * Generic vector deque via sendmmsg with support for forming headers
391
+ * Generic vector dequeue via sendmmsg with support for forming headers
402
392
* using transport specific callback. Allows GRE, L2TPv3, RAW and
403
393
* other transports to use a common dequeue procedure in vector mode
404
394
*/
@@ -408,69 +398,64 @@ static int vector_send(struct vector_queue *qi)
408
398
{
409
399
struct vector_private * vp = netdev_priv (qi -> dev );
410
400
struct mmsghdr * send_from ;
411
- int result = 0 , send_len , queue_depth = qi -> max_depth ;
401
+ int result = 0 , send_len ;
412
402
413
403
if (spin_trylock (& qi -> head_lock )) {
414
- if (spin_trylock (& qi -> tail_lock )) {
415
- /* update queue_depth to current value */
416
- queue_depth = qi -> queue_depth ;
417
- spin_unlock (& qi -> tail_lock );
418
- while (queue_depth > 0 ) {
419
- /* Calculate the start of the vector */
420
- send_len = queue_depth ;
421
- send_from = qi -> mmsg_vector ;
422
- send_from += qi -> head ;
423
- /* Adjust vector size if wraparound */
424
- if (send_len + qi -> head > qi -> max_depth )
425
- send_len = qi -> max_depth - qi -> head ;
426
- /* Try to TX as many packets as possible */
427
- if (send_len > 0 ) {
428
- result = uml_vector_sendmmsg (
429
- vp -> fds -> tx_fd ,
430
- send_from ,
431
- send_len ,
432
- 0
433
- );
434
- vp -> in_write_poll =
435
- (result != send_len );
436
- }
437
- /* For some of the sendmmsg error scenarios
438
- * we may end being unsure in the TX success
439
- * for all packets. It is safer to declare
440
- * them all TX-ed and blame the network.
441
- */
442
- if (result < 0 ) {
443
- if (net_ratelimit ())
444
- netdev_err (vp -> dev , "sendmmsg err=%i\n" ,
445
- result );
446
- vp -> in_error = true;
447
- result = send_len ;
448
- }
449
- if (result > 0 ) {
450
- queue_depth =
451
- consume_vector_skbs (qi , result );
452
- /* This is equivalent to an TX IRQ.
453
- * Restart the upper layers to feed us
454
- * more packets.
455
- */
456
- if (result > vp -> estats .tx_queue_max )
457
- vp -> estats .tx_queue_max = result ;
458
- vp -> estats .tx_queue_running_average =
459
- (vp -> estats .tx_queue_running_average + result ) >> 1 ;
460
- }
461
- netif_wake_queue (qi -> dev );
462
- /* if TX is busy, break out of the send loop,
463
- * poll write IRQ will reschedule xmit for us
404
+ /* update queue_depth to current value */
405
+ while (atomic_read (& qi -> queue_depth ) > 0 ) {
406
+ /* Calculate the start of the vector */
407
+ send_len = atomic_read (& qi -> queue_depth );
408
+ send_from = qi -> mmsg_vector ;
409
+ send_from += qi -> head ;
410
+ /* Adjust vector size if wraparound */
411
+ if (send_len + qi -> head > qi -> max_depth )
412
+ send_len = qi -> max_depth - qi -> head ;
413
+ /* Try to TX as many packets as possible */
414
+ if (send_len > 0 ) {
415
+ result = uml_vector_sendmmsg (
416
+ vp -> fds -> tx_fd ,
417
+ send_from ,
418
+ send_len ,
419
+ 0
420
+ );
421
+ vp -> in_write_poll =
422
+ (result != send_len );
423
+ }
424
+ /* For some of the sendmmsg error scenarios
425
+ * we may end being unsure in the TX success
426
+ * for all packets. It is safer to declare
427
+ * them all TX-ed and blame the network.
428
+ */
429
+ if (result < 0 ) {
430
+ if (net_ratelimit ())
431
+ netdev_err (vp -> dev , "sendmmsg err=%i\n" ,
432
+ result );
433
+ vp -> in_error = true;
434
+ result = send_len ;
435
+ }
436
+ if (result > 0 ) {
437
+ consume_vector_skbs (qi , result );
438
+ /* This is equivalent to an TX IRQ.
439
+ * Restart the upper layers to feed us
440
+ * more packets.
464
441
*/
465
- if (result != send_len ) {
466
- vp -> estats .tx_restart_queue ++ ;
467
- break ;
468
- }
442
+ if (result > vp -> estats .tx_queue_max )
443
+ vp -> estats .tx_queue_max = result ;
444
+ vp -> estats .tx_queue_running_average =
445
+ (vp -> estats .tx_queue_running_average + result ) >> 1 ;
446
+ }
447
+ netif_wake_queue (qi -> dev );
448
+ /* if TX is busy, break out of the send loop,
449
+ * poll write IRQ will reschedule xmit for us.
450
+ */
451
+ if (result != send_len ) {
452
+ vp -> estats .tx_restart_queue ++ ;
453
+ break ;
469
454
}
470
455
}
471
456
spin_unlock (& qi -> head_lock );
472
457
}
473
- return queue_depth ;
458
+ return atomic_read ( & qi -> queue_depth ) ;
474
459
}
475
460
476
461
/* Queue destructor. Deliberately stateless so we can use
@@ -589,7 +574,7 @@ static struct vector_queue *create_queue(
589
574
}
590
575
spin_lock_init (& result -> head_lock );
591
576
spin_lock_init (& result -> tail_lock );
592
- result -> queue_depth = 0 ;
577
+ atomic_set ( & result -> queue_depth , 0 ) ;
593
578
result -> head = 0 ;
594
579
result -> tail = 0 ;
595
580
return result ;
@@ -668,18 +653,27 @@ static struct sk_buff *prep_skb(
668
653
}
669
654
670
655
671
- /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs*/
656
+ /* Prepare queue for recvmmsg one-shot rx - fill with fresh sk_buffs */
672
657
673
658
static void prep_queue_for_rx (struct vector_queue * qi )
674
659
{
675
660
struct vector_private * vp = netdev_priv (qi -> dev );
676
661
struct mmsghdr * mmsg_vector = qi -> mmsg_vector ;
677
662
void * * skbuff_vector = qi -> skbuff_vector ;
678
- int i ;
663
+ int i , queue_depth ;
664
+
665
+ queue_depth = atomic_read (& qi -> queue_depth );
679
666
680
- if (qi -> queue_depth == 0 )
667
+ if (queue_depth == 0 )
681
668
return ;
682
- for (i = 0 ; i < qi -> queue_depth ; i ++ ) {
669
+
670
+ /* RX is always emptied 100% during each cycle, so we do not
671
+ * have to do the tail wraparound math for it.
672
+ */
673
+
674
+ qi -> head = qi -> tail = 0 ;
675
+
676
+ for (i = 0 ; i < queue_depth ; i ++ ) {
683
677
/* it is OK if allocation fails - recvmmsg with NULL data in
684
678
* iov argument still performs an RX, just drops the packet
685
679
* This allows us stop faffing around with a "drop buffer"
@@ -689,7 +683,7 @@ static void prep_queue_for_rx(struct vector_queue *qi)
689
683
skbuff_vector ++ ;
690
684
mmsg_vector ++ ;
691
685
}
692
- qi -> queue_depth = 0 ;
686
+ atomic_set ( & qi -> queue_depth , 0 ) ;
693
687
}
694
688
695
689
static struct vector_device * find_device (int n )
@@ -985,7 +979,7 @@ static int vector_mmsg_rx(struct vector_private *vp, int budget)
985
979
* many do we need to prep the next time prep_queue_for_rx() is called.
986
980
*/
987
981
988
- qi -> queue_depth = packet_count ;
982
+ atomic_add ( packet_count , & qi -> queue_depth ) ;
989
983
990
984
for (i = 0 ; i < packet_count ; i ++ ) {
991
985
skb = (* skbuff_vector );
@@ -1172,13 +1166,15 @@ static int vector_poll(struct napi_struct *napi, int budget)
1172
1166
1173
1167
if ((vp -> options & VECTOR_TX ) != 0 )
1174
1168
tx_enqueued = (vector_send (vp -> tx_queue ) > 0 );
1169
+ spin_lock (& vp -> rx_queue -> head_lock );
1175
1170
if ((vp -> options & VECTOR_RX ) > 0 )
1176
1171
err = vector_mmsg_rx (vp , budget );
1177
1172
else {
1178
1173
err = vector_legacy_rx (vp );
1179
1174
if (err > 0 )
1180
1175
err = 1 ;
1181
1176
}
1177
+ spin_unlock (& vp -> rx_queue -> head_lock );
1182
1178
if (err > 0 )
1183
1179
work_done += err ;
1184
1180
@@ -1225,7 +1221,7 @@ static int vector_net_open(struct net_device *dev)
1225
1221
vp -> rx_header_size ,
1226
1222
MAX_IOV_SIZE
1227
1223
);
1228
- vp -> rx_queue -> queue_depth = get_depth (vp -> parsed );
1224
+ atomic_set ( & vp -> rx_queue -> queue_depth , get_depth (vp -> parsed ) );
1229
1225
} else {
1230
1226
vp -> header_rxbuffer = kmalloc (
1231
1227
vp -> rx_header_size ,
@@ -1467,7 +1463,17 @@ static void vector_get_ethtool_stats(struct net_device *dev,
1467
1463
{
1468
1464
struct vector_private * vp = netdev_priv (dev );
1469
1465
1466
+ /* Stats are modified in the dequeue portions of
1467
+ * rx/tx which are protected by the head locks
1468
+ * grabbing these locks here ensures they are up
1469
+ * to date.
1470
+ */
1471
+
1472
+ spin_lock (& vp -> tx_queue -> head_lock );
1473
+ spin_lock (& vp -> rx_queue -> head_lock );
1470
1474
memcpy (tmp_stats , & vp -> estats , sizeof (struct vector_estats ));
1475
+ spin_unlock (& vp -> rx_queue -> head_lock );
1476
+ spin_unlock (& vp -> tx_queue -> head_lock );
1471
1477
}
1472
1478
1473
1479
static int vector_get_coalesce (struct net_device * netdev ,
0 commit comments