@@ -51,14 +51,10 @@ struct sifive_fu540_macb_mgmt {
5151#define DEFAULT_RX_RING_SIZE 512 /* must be power of 2 */
5252#define MIN_RX_RING_SIZE 64
5353#define MAX_RX_RING_SIZE 8192
54- #define RX_RING_BYTES (bp ) (macb_dma_desc_get_size(bp) \
55- * (bp)->rx_ring_size)
5654
5755#define DEFAULT_TX_RING_SIZE 512 /* must be power of 2 */
5856#define MIN_TX_RING_SIZE 64
5957#define MAX_TX_RING_SIZE 4096
60- #define TX_RING_BYTES (bp ) (macb_dma_desc_get_size(bp) \
61- * (bp)->tx_ring_size)
6258
6359/* level of occupied TX descriptors under which we wake up TX process */
6460#define MACB_TX_WAKEUP_THRESH (bp ) (3 * (bp)->tx_ring_size / 4)
@@ -278,9 +274,9 @@ static void macb_set_hwaddr(struct macb *bp)
278274 u32 bottom ;
279275 u16 top ;
280276
281- bottom = cpu_to_le32 ( * (( u32 * ) bp -> dev -> dev_addr ) );
277+ bottom = get_unaligned_le32 ( bp -> dev -> dev_addr );
282278 macb_or_gem_writel (bp , SA1B , bottom );
283- top = cpu_to_le16 ( * (( u16 * )( bp -> dev -> dev_addr + 4 )) );
279+ top = get_unaligned_le16 ( bp -> dev -> dev_addr + 4 );
284280 macb_or_gem_writel (bp , SA1T , top );
285281
286282 if (gem_has_ptp (bp )) {
@@ -495,19 +491,19 @@ static void macb_init_buffers(struct macb *bp)
495491 struct macb_queue * queue ;
496492 unsigned int q ;
497493
498- for (q = 0 , queue = bp -> queues ; q < bp -> num_queues ; ++ q , ++ queue ) {
499- queue_writel (queue , RBQP , lower_32_bits (queue -> rx_ring_dma ));
500494#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
501- if (bp -> hw_dma_cap & HW_DMA_CAP_64B )
502- queue_writel (queue , RBQPH ,
503- upper_32_bits (queue -> rx_ring_dma ));
495+ /* Single register for all queues' high 32 bits. */
496+ if (bp -> hw_dma_cap & HW_DMA_CAP_64B ) {
497+ macb_writel (bp , RBQPH ,
498+ upper_32_bits (bp -> queues [0 ].rx_ring_dma ));
499+ macb_writel (bp , TBQPH ,
500+ upper_32_bits (bp -> queues [0 ].tx_ring_dma ));
501+ }
504502#endif
503+
504+ for (q = 0 , queue = bp -> queues ; q < bp -> num_queues ; ++ q , ++ queue ) {
505+ queue_writel (queue , RBQP , lower_32_bits (queue -> rx_ring_dma ));
505506 queue_writel (queue , TBQP , lower_32_bits (queue -> tx_ring_dma ));
506- #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
507- if (bp -> hw_dma_cap & HW_DMA_CAP_64B )
508- queue_writel (queue , TBQPH ,
509- upper_32_bits (queue -> tx_ring_dma ));
510- #endif
511507 }
512508}
513509
@@ -1166,10 +1162,6 @@ static void macb_tx_error_task(struct work_struct *work)
11661162
11671163 /* Reinitialize the TX desc queue */
11681164 queue_writel (queue , TBQP , lower_32_bits (queue -> tx_ring_dma ));
1169- #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
1170- if (bp -> hw_dma_cap & HW_DMA_CAP_64B )
1171- queue_writel (queue , TBQPH , upper_32_bits (queue -> tx_ring_dma ));
1172- #endif
11731165 /* Make TX ring reflect state of hardware */
11741166 queue -> tx_head = 0 ;
11751167 queue -> tx_tail = 0 ;
@@ -2474,35 +2466,42 @@ static void macb_free_rx_buffers(struct macb *bp)
24742466 }
24752467}
24762468
2469+ static unsigned int macb_tx_ring_size_per_queue (struct macb * bp )
2470+ {
2471+ return macb_dma_desc_get_size (bp ) * bp -> tx_ring_size + bp -> tx_bd_rd_prefetch ;
2472+ }
2473+
2474+ static unsigned int macb_rx_ring_size_per_queue (struct macb * bp )
2475+ {
2476+ return macb_dma_desc_get_size (bp ) * bp -> rx_ring_size + bp -> rx_bd_rd_prefetch ;
2477+ }
2478+
24772479static void macb_free_consistent (struct macb * bp )
24782480{
2481+ struct device * dev = & bp -> pdev -> dev ;
24792482 struct macb_queue * queue ;
24802483 unsigned int q ;
2481- int size ;
2484+ size_t size ;
24822485
24832486 if (bp -> rx_ring_tieoff ) {
2484- dma_free_coherent (& bp -> pdev -> dev , macb_dma_desc_get_size (bp ),
2487+ dma_free_coherent (dev , macb_dma_desc_get_size (bp ),
24852488 bp -> rx_ring_tieoff , bp -> rx_ring_tieoff_dma );
24862489 bp -> rx_ring_tieoff = NULL ;
24872490 }
24882491
24892492 bp -> macbgem_ops .mog_free_rx_buffers (bp );
24902493
2494+ size = bp -> num_queues * macb_tx_ring_size_per_queue (bp );
2495+ dma_free_coherent (dev , size , bp -> queues [0 ].tx_ring , bp -> queues [0 ].tx_ring_dma );
2496+
2497+ size = bp -> num_queues * macb_rx_ring_size_per_queue (bp );
2498+ dma_free_coherent (dev , size , bp -> queues [0 ].rx_ring , bp -> queues [0 ].rx_ring_dma );
2499+
24912500 for (q = 0 , queue = bp -> queues ; q < bp -> num_queues ; ++ q , ++ queue ) {
24922501 kfree (queue -> tx_skb );
24932502 queue -> tx_skb = NULL ;
2494- if (queue -> tx_ring ) {
2495- size = TX_RING_BYTES (bp ) + bp -> tx_bd_rd_prefetch ;
2496- dma_free_coherent (& bp -> pdev -> dev , size ,
2497- queue -> tx_ring , queue -> tx_ring_dma );
2498- queue -> tx_ring = NULL ;
2499- }
2500- if (queue -> rx_ring ) {
2501- size = RX_RING_BYTES (bp ) + bp -> rx_bd_rd_prefetch ;
2502- dma_free_coherent (& bp -> pdev -> dev , size ,
2503- queue -> rx_ring , queue -> rx_ring_dma );
2504- queue -> rx_ring = NULL ;
2505- }
2503+ queue -> tx_ring = NULL ;
2504+ queue -> rx_ring = NULL ;
25062505 }
25072506}
25082507
@@ -2544,35 +2543,45 @@ static int macb_alloc_rx_buffers(struct macb *bp)
25442543
25452544static int macb_alloc_consistent (struct macb * bp )
25462545{
2546+ struct device * dev = & bp -> pdev -> dev ;
2547+ dma_addr_t tx_dma , rx_dma ;
25472548 struct macb_queue * queue ;
25482549 unsigned int q ;
2549- int size ;
2550+ void * tx , * rx ;
2551+ size_t size ;
2552+
2553+ /*
2554+ * Upper 32-bits of Tx/Rx DMA descriptor for each queues much match!
2555+ * We cannot enforce this guarantee, the best we can do is do a single
2556+ * allocation and hope it will land into alloc_pages() that guarantees
2557+ * natural alignment of physical addresses.
2558+ */
2559+
2560+ size = bp -> num_queues * macb_tx_ring_size_per_queue (bp );
2561+ tx = dma_alloc_coherent (dev , size , & tx_dma , GFP_KERNEL );
2562+ if (!tx || upper_32_bits (tx_dma ) != upper_32_bits (tx_dma + size - 1 ))
2563+ goto out_err ;
2564+ netdev_dbg (bp -> dev , "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n" ,
2565+ size , bp -> num_queues , (unsigned long )tx_dma , tx );
2566+
2567+ size = bp -> num_queues * macb_rx_ring_size_per_queue (bp );
2568+ rx = dma_alloc_coherent (dev , size , & rx_dma , GFP_KERNEL );
2569+ if (!rx || upper_32_bits (rx_dma ) != upper_32_bits (rx_dma + size - 1 ))
2570+ goto out_err ;
2571+ netdev_dbg (bp -> dev , "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n" ,
2572+ size , bp -> num_queues , (unsigned long )rx_dma , rx );
25502573
25512574 for (q = 0 , queue = bp -> queues ; q < bp -> num_queues ; ++ q , ++ queue ) {
2552- size = TX_RING_BYTES (bp ) + bp -> tx_bd_rd_prefetch ;
2553- queue -> tx_ring = dma_alloc_coherent (& bp -> pdev -> dev , size ,
2554- & queue -> tx_ring_dma ,
2555- GFP_KERNEL );
2556- if (!queue -> tx_ring )
2557- goto out_err ;
2558- netdev_dbg (bp -> dev ,
2559- "Allocated TX ring for queue %u of %d bytes at %08lx (mapped %p)\n" ,
2560- q , size , (unsigned long )queue -> tx_ring_dma ,
2561- queue -> tx_ring );
2575+ queue -> tx_ring = tx + macb_tx_ring_size_per_queue (bp ) * q ;
2576+ queue -> tx_ring_dma = tx_dma + macb_tx_ring_size_per_queue (bp ) * q ;
2577+
2578+ queue -> rx_ring = rx + macb_rx_ring_size_per_queue (bp ) * q ;
2579+ queue -> rx_ring_dma = rx_dma + macb_rx_ring_size_per_queue (bp ) * q ;
25622580
25632581 size = bp -> tx_ring_size * sizeof (struct macb_tx_skb );
25642582 queue -> tx_skb = kmalloc (size , GFP_KERNEL );
25652583 if (!queue -> tx_skb )
25662584 goto out_err ;
2567-
2568- size = RX_RING_BYTES (bp ) + bp -> rx_bd_rd_prefetch ;
2569- queue -> rx_ring = dma_alloc_coherent (& bp -> pdev -> dev , size ,
2570- & queue -> rx_ring_dma , GFP_KERNEL );
2571- if (!queue -> rx_ring )
2572- goto out_err ;
2573- netdev_dbg (bp -> dev ,
2574- "Allocated RX ring of %d bytes at %08lx (mapped %p)\n" ,
2575- size , (unsigned long )queue -> rx_ring_dma , queue -> rx_ring );
25762585 }
25772586 if (bp -> macbgem_ops .mog_alloc_rx_buffers (bp ))
25782587 goto out_err ;
@@ -4309,12 +4318,6 @@ static int macb_init(struct platform_device *pdev)
43094318 queue -> TBQP = GEM_TBQP (hw_q - 1 );
43104319 queue -> RBQP = GEM_RBQP (hw_q - 1 );
43114320 queue -> RBQS = GEM_RBQS (hw_q - 1 );
4312- #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
4313- if (bp -> hw_dma_cap & HW_DMA_CAP_64B ) {
4314- queue -> TBQPH = GEM_TBQPH (hw_q - 1 );
4315- queue -> RBQPH = GEM_RBQPH (hw_q - 1 );
4316- }
4317- #endif
43184321 } else {
43194322 /* queue0 uses legacy registers */
43204323 queue -> ISR = MACB_ISR ;
@@ -4323,12 +4326,6 @@ static int macb_init(struct platform_device *pdev)
43234326 queue -> IMR = MACB_IMR ;
43244327 queue -> TBQP = MACB_TBQP ;
43254328 queue -> RBQP = MACB_RBQP ;
4326- #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
4327- if (bp -> hw_dma_cap & HW_DMA_CAP_64B ) {
4328- queue -> TBQPH = MACB_TBQPH ;
4329- queue -> RBQPH = MACB_RBQPH ;
4330- }
4331- #endif
43324329 }
43334330
43344331 /* get irq: here we use the linux queue index, not the hardware
@@ -5452,6 +5449,11 @@ static int __maybe_unused macb_suspend(struct device *dev)
54525449 */
54535450 tmp = macb_readl (bp , NCR );
54545451 macb_writel (bp , NCR , tmp & ~(MACB_BIT (TE ) | MACB_BIT (RE )));
5452+ #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
5453+ if (!(bp -> caps & MACB_CAPS_QUEUE_DISABLE ))
5454+ macb_writel (bp , RBQPH ,
5455+ upper_32_bits (bp -> rx_ring_tieoff_dma ));
5456+ #endif
54555457 for (q = 0 , queue = bp -> queues ; q < bp -> num_queues ;
54565458 ++ q , ++ queue ) {
54575459 /* Disable RX queues */
@@ -5461,10 +5463,6 @@ static int __maybe_unused macb_suspend(struct device *dev)
54615463 /* Tie off RX queues */
54625464 queue_writel (queue , RBQP ,
54635465 lower_32_bits (bp -> rx_ring_tieoff_dma ));
5464- #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
5465- queue_writel (queue , RBQPH ,
5466- upper_32_bits (bp -> rx_ring_tieoff_dma ));
5467- #endif
54685466 }
54695467 /* Disable all interrupts */
54705468 queue_writel (queue , IDR , -1 );
0 commit comments