Skip to content

Commit 3dc5d44

Browse files
tanstafelPaolo Abeni
authored andcommitted
net: ks8851: Fix TX stall caused by TX buffer overrun
There is a bug in the ks8851 Ethernet driver that more data is written to the hardware TX buffer than actually available. This is caused by wrong accounting of the free TX buffer space. The driver maintains a tx_space variable that represents the TX buffer space that is deemed to be free. The ks8851_start_xmit_spi() function adds an SKB to a queue if tx_space is large enough and reduces tx_space by the amount of buffer space it will later need in the TX buffer and then schedules a work item. If there is not enough space then the TX queue is stopped. The worker function ks8851_tx_work() dequeues all the SKBs and writes the data into the hardware TX buffer. The last packet will trigger an interrupt after it was send. Here it is assumed that all data fits into the TX buffer. In the interrupt routine (which runs asynchronously because it is a threaded interrupt) tx_space is updated with the current value from the hardware. Also the TX queue is woken up again. Now it could happen that after data was sent to the hardware and before handling the TX interrupt new data is queued in ks8851_start_xmit_spi() when the TX buffer space had still some space left. When the interrupt is actually handled tx_space is updated from the hardware but now we already have new SKBs queued that have not been written to the hardware TX buffer yet. Since tx_space has been overwritten by the value from the hardware the space is not accounted for. Now we have more data queued then buffer space available in the hardware and ks8851_tx_work() will potentially overrun the hardware TX buffer. In many cases it will still work because often the buffer is written out fast enough so that no overrun occurs but for example if the peer throttles us via flow control then an overrun may happen. This can be fixed in different ways. The most simple way would be to set tx_space to 0 before writing data to the hardware TX buffer preventing the queuing of more SKBs until the TX interrupt has been handled. I have chosen a slightly more efficient (and still rather simple) way and track the amount of data that is already queued and not yet written to the hardware. When new SKBs are to be queued the already queued amount of data is honoured when checking free TX buffer space. I tested this with a setup of two linked KS8851 running iperf3 between the two in bidirectional mode. Before the fix I got a stall after some minutes. With the fix I saw now issues anymore after hours. Fixes: 3ba81f3 ("net: Micrel KS8851 SPI network driver") Cc: "David S. Miller" <[email protected]> Cc: Eric Dumazet <[email protected]> Cc: Jakub Kicinski <[email protected]> Cc: Paolo Abeni <[email protected]> Cc: Ben Dooks <[email protected]> Cc: Tristram Ha <[email protected]> Cc: [email protected] Cc: [email protected] # 5.10+ Signed-off-by: Ronald Wahl <[email protected]> Reviewed-by: Simon Horman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
1 parent 979e901 commit 3dc5d44

File tree

3 files changed

+40
-25
lines changed

3 files changed

+40
-25
lines changed

drivers/net/ethernet/micrel/ks8851.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@ union ks8851_tx_hdr {
350350
* @rxd: Space for receiving SPI data, in DMA-able space.
351351
* @txd: Space for transmitting SPI data, in DMA-able space.
352352
* @msg_enable: The message flags controlling driver output (see ethtool).
353+
* @tx_space: Free space in the hardware TX buffer (cached copy of KS_TXMIR).
354+
* @queued_len: Space required in hardware TX buffer for queued packets in txq.
353355
* @fid: Incrementing frame id tag.
354356
* @rc_ier: Cached copy of KS_IER.
355357
* @rc_ccr: Cached copy of KS_CCR.
@@ -399,6 +401,7 @@ struct ks8851_net {
399401
struct work_struct rxctrl_work;
400402

401403
struct sk_buff_head txq;
404+
unsigned int queued_len;
402405

403406
struct eeprom_93cx6 eeprom;
404407
struct regulator *vdd_reg;

drivers/net/ethernet/micrel/ks8851_common.c

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -362,16 +362,18 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
362362
handled |= IRQ_RXPSI;
363363

364364
if (status & IRQ_TXI) {
365-
handled |= IRQ_TXI;
365+
unsigned short tx_space = ks8851_rdreg16(ks, KS_TXMIR);
366366

367-
/* no lock here, tx queue should have been stopped */
367+
netif_dbg(ks, intr, ks->netdev,
368+
"%s: txspace %d\n", __func__, tx_space);
368369

369-
/* update our idea of how much tx space is available to the
370-
* system */
371-
ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
370+
spin_lock(&ks->statelock);
371+
ks->tx_space = tx_space;
372+
if (netif_queue_stopped(ks->netdev))
373+
netif_wake_queue(ks->netdev);
374+
spin_unlock(&ks->statelock);
372375

373-
netif_dbg(ks, intr, ks->netdev,
374-
"%s: txspace %d\n", __func__, ks->tx_space);
376+
handled |= IRQ_TXI;
375377
}
376378

377379
if (status & IRQ_RXI)
@@ -414,9 +416,6 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
414416
if (status & IRQ_LCI)
415417
mii_check_link(&ks->mii);
416418

417-
if (status & IRQ_TXI)
418-
netif_wake_queue(ks->netdev);
419-
420419
return IRQ_HANDLED;
421420
}
422421

@@ -500,6 +499,7 @@ static int ks8851_net_open(struct net_device *dev)
500499
ks8851_wrreg16(ks, KS_ISR, ks->rc_ier);
501500
ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
502501

502+
ks->queued_len = 0;
503503
netif_start_queue(ks->netdev);
504504

505505
netif_dbg(ks, ifup, ks->netdev, "network device up\n");

drivers/net/ethernet/micrel/ks8851_spi.c

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,18 @@ static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp,
286286
netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__);
287287
}
288288

289+
/**
290+
* calc_txlen - calculate size of message to send packet
291+
* @len: Length of data
292+
*
293+
* Returns the size of the TXFIFO message needed to send
294+
* this packet.
295+
*/
296+
static unsigned int calc_txlen(unsigned int len)
297+
{
298+
return ALIGN(len + 4, 4);
299+
}
300+
289301
/**
290302
* ks8851_rx_skb_spi - receive skbuff
291303
* @ks: The device state
@@ -305,7 +317,9 @@ static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
305317
*/
306318
static void ks8851_tx_work(struct work_struct *work)
307319
{
320+
unsigned int dequeued_len = 0;
308321
struct ks8851_net_spi *kss;
322+
unsigned short tx_space;
309323
struct ks8851_net *ks;
310324
unsigned long flags;
311325
struct sk_buff *txb;
@@ -322,6 +336,8 @@ static void ks8851_tx_work(struct work_struct *work)
322336
last = skb_queue_empty(&ks->txq);
323337

324338
if (txb) {
339+
dequeued_len += calc_txlen(txb->len);
340+
325341
ks8851_wrreg16_spi(ks, KS_RXQCR,
326342
ks->rc_rxqcr | RXQCR_SDA);
327343
ks8851_wrfifo_spi(ks, txb, last);
@@ -332,6 +348,13 @@ static void ks8851_tx_work(struct work_struct *work)
332348
}
333349
}
334350

351+
tx_space = ks8851_rdreg16_spi(ks, KS_TXMIR);
352+
353+
spin_lock(&ks->statelock);
354+
ks->queued_len -= dequeued_len;
355+
ks->tx_space = tx_space;
356+
spin_unlock(&ks->statelock);
357+
335358
ks8851_unlock_spi(ks, &flags);
336359
}
337360

@@ -346,18 +369,6 @@ static void ks8851_flush_tx_work_spi(struct ks8851_net *ks)
346369
flush_work(&kss->tx_work);
347370
}
348371

349-
/**
350-
* calc_txlen - calculate size of message to send packet
351-
* @len: Length of data
352-
*
353-
* Returns the size of the TXFIFO message needed to send
354-
* this packet.
355-
*/
356-
static unsigned int calc_txlen(unsigned int len)
357-
{
358-
return ALIGN(len + 4, 4);
359-
}
360-
361372
/**
362373
* ks8851_start_xmit_spi - transmit packet using SPI
363374
* @skb: The buffer to transmit
@@ -386,16 +397,17 @@ static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb,
386397

387398
spin_lock(&ks->statelock);
388399

389-
if (needed > ks->tx_space) {
400+
if (ks->queued_len + needed > ks->tx_space) {
390401
netif_stop_queue(dev);
391402
ret = NETDEV_TX_BUSY;
392403
} else {
393-
ks->tx_space -= needed;
404+
ks->queued_len += needed;
394405
skb_queue_tail(&ks->txq, skb);
395406
}
396407

397408
spin_unlock(&ks->statelock);
398-
schedule_work(&kss->tx_work);
409+
if (ret == NETDEV_TX_OK)
410+
schedule_work(&kss->tx_work);
399411

400412
return ret;
401413
}

0 commit comments

Comments
 (0)