Skip to content

Commit 69797ff

Browse files
Wei Fangkuba-moo
authored andcommitted
net: enetc: add LSO support for i.MX95 ENETC PF
ENETC rev 4.1 supports large send offload (LSO), segmenting large TCP and UDP transmit units into multiple Ethernet frames. To support LSO, software needs to fill some auxiliary information in Tx BD, such as LSO header length, frame length, LSO maximum segment size, etc. At 1Gbps link rate, TCP segmentation was tested using iperf3, and the CPU performance before and after applying the patch was compared through the top command. It can be seen that LSO saves a significant amount of CPU cycles compared to software TSO. Before applying the patch: %Cpu(s): 0.1 us, 4.1 sy, 0.0 ni, 85.7 id, 0.0 wa, 0.5 hi, 9.7 si After applying the patch: %Cpu(s): 0.1 us, 2.3 sy, 0.0 ni, 94.5 id, 0.0 wa, 0.4 hi, 2.6 si Signed-off-by: Wei Fang <[email protected]> Reviewed-by: Frank Li <[email protected]> Reviewed-by: Claudiu Manoil <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 93c5d5a commit 69797ff

File tree

5 files changed

+310
-10
lines changed

5 files changed

+310
-10
lines changed

drivers/net/ethernet/freescale/enetc/enetc.c

Lines changed: 257 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,230 @@ static void enetc_tso_complete_csum(struct enetc_bdr *tx_ring, struct tso_t *tso
532532
}
533533
}
534534

535+
static int enetc_lso_count_descs(const struct sk_buff *skb)
536+
{
537+
/* 4 BDs: 1 BD for LSO header + 1 BD for extended BD + 1 BD
538+
* for linear area data but not include LSO header, namely
539+
* skb_headlen(skb) - lso_hdr_len (it may be 0, but that's
540+
* okay, we only need to consider the worst case). And 1 BD
541+
* for gap.
542+
*/
543+
return skb_shinfo(skb)->nr_frags + 4;
544+
}
545+
546+
static int enetc_lso_get_hdr_len(const struct sk_buff *skb)
547+
{
548+
int hdr_len, tlen;
549+
550+
tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
551+
hdr_len = skb_transport_offset(skb) + tlen;
552+
553+
return hdr_len;
554+
}
555+
556+
static void enetc_lso_start(struct sk_buff *skb, struct enetc_lso_t *lso)
557+
{
558+
lso->lso_seg_size = skb_shinfo(skb)->gso_size;
559+
lso->ipv6 = enetc_skb_is_ipv6(skb);
560+
lso->tcp = skb_is_gso_tcp(skb);
561+
lso->l3_hdr_len = skb_network_header_len(skb);
562+
lso->l3_start = skb_network_offset(skb);
563+
lso->hdr_len = enetc_lso_get_hdr_len(skb);
564+
lso->total_len = skb->len - lso->hdr_len;
565+
}
566+
567+
static void enetc_lso_map_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb,
568+
int *i, struct enetc_lso_t *lso)
569+
{
570+
union enetc_tx_bd txbd_tmp, *txbd;
571+
struct enetc_tx_swbd *tx_swbd;
572+
u16 frm_len, frm_len_ext;
573+
u8 flags, e_flags = 0;
574+
dma_addr_t addr;
575+
char *hdr;
576+
577+
/* Get the first BD of the LSO BDs chain */
578+
txbd = ENETC_TXBD(*tx_ring, *i);
579+
tx_swbd = &tx_ring->tx_swbd[*i];
580+
prefetchw(txbd);
581+
582+
/* Prepare LSO header: MAC + IP + TCP/UDP */
583+
hdr = tx_ring->tso_headers + *i * TSO_HEADER_SIZE;
584+
memcpy(hdr, skb->data, lso->hdr_len);
585+
addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE;
586+
587+
/* {frm_len_ext, frm_len} indicates the total length of
588+
* large transmit data unit. frm_len contains the 16 least
589+
* significant bits and frm_len_ext contains the 4 most
590+
* significant bits.
591+
*/
592+
frm_len = lso->total_len & 0xffff;
593+
frm_len_ext = (lso->total_len >> 16) & 0xf;
594+
595+
/* Set the flags of the first BD */
596+
flags = ENETC_TXBD_FLAGS_EX | ENETC_TXBD_FLAGS_CSUM_LSO |
597+
ENETC_TXBD_FLAGS_LSO | ENETC_TXBD_FLAGS_L4CS;
598+
599+
enetc_clear_tx_bd(&txbd_tmp);
600+
txbd_tmp.addr = cpu_to_le64(addr);
601+
txbd_tmp.hdr_len = cpu_to_le16(lso->hdr_len);
602+
603+
/* first BD needs frm_len and offload flags set */
604+
txbd_tmp.frm_len = cpu_to_le16(frm_len);
605+
txbd_tmp.flags = flags;
606+
607+
txbd_tmp.l3_aux0 = FIELD_PREP(ENETC_TX_BD_L3_START, lso->l3_start);
608+
/* l3_hdr_size in 32-bits (4 bytes) */
609+
txbd_tmp.l3_aux1 = FIELD_PREP(ENETC_TX_BD_L3_HDR_LEN,
610+
lso->l3_hdr_len / 4);
611+
if (lso->ipv6)
612+
txbd_tmp.l3_aux1 |= ENETC_TX_BD_L3T;
613+
else
614+
txbd_tmp.l3_aux0 |= ENETC_TX_BD_IPCS;
615+
616+
txbd_tmp.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, lso->tcp ?
617+
ENETC_TXBD_L4T_TCP : ENETC_TXBD_L4T_UDP);
618+
619+
/* For the LSO header we do not set the dma address since
620+
* we do not want it unmapped when we do cleanup. We still
621+
* set len so that we count the bytes sent.
622+
*/
623+
tx_swbd->len = lso->hdr_len;
624+
tx_swbd->do_twostep_tstamp = false;
625+
tx_swbd->check_wb = false;
626+
627+
/* Actually write the header in the BD */
628+
*txbd = txbd_tmp;
629+
630+
/* Get the next BD, and the next BD is extended BD */
631+
enetc_bdr_idx_inc(tx_ring, i);
632+
txbd = ENETC_TXBD(*tx_ring, *i);
633+
tx_swbd = &tx_ring->tx_swbd[*i];
634+
prefetchw(txbd);
635+
636+
enetc_clear_tx_bd(&txbd_tmp);
637+
if (skb_vlan_tag_present(skb)) {
638+
/* Setup the VLAN fields */
639+
txbd_tmp.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb));
640+
txbd_tmp.ext.tpid = ENETC_TPID_8021Q;
641+
e_flags = ENETC_TXBD_E_FLAGS_VLAN_INS;
642+
}
643+
644+
/* Write the BD */
645+
txbd_tmp.ext.e_flags = e_flags;
646+
txbd_tmp.ext.lso_sg_size = cpu_to_le16(lso->lso_seg_size);
647+
txbd_tmp.ext.frm_len_ext = cpu_to_le16(frm_len_ext);
648+
*txbd = txbd_tmp;
649+
}
650+
651+
static int enetc_lso_map_data(struct enetc_bdr *tx_ring, struct sk_buff *skb,
652+
int *i, struct enetc_lso_t *lso, int *count)
653+
{
654+
union enetc_tx_bd txbd_tmp, *txbd = NULL;
655+
struct enetc_tx_swbd *tx_swbd;
656+
skb_frag_t *frag;
657+
dma_addr_t dma;
658+
u8 flags = 0;
659+
int len, f;
660+
661+
len = skb_headlen(skb) - lso->hdr_len;
662+
if (len > 0) {
663+
dma = dma_map_single(tx_ring->dev, skb->data + lso->hdr_len,
664+
len, DMA_TO_DEVICE);
665+
if (dma_mapping_error(tx_ring->dev, dma))
666+
return -ENOMEM;
667+
668+
enetc_bdr_idx_inc(tx_ring, i);
669+
txbd = ENETC_TXBD(*tx_ring, *i);
670+
tx_swbd = &tx_ring->tx_swbd[*i];
671+
prefetchw(txbd);
672+
*count += 1;
673+
674+
enetc_clear_tx_bd(&txbd_tmp);
675+
txbd_tmp.addr = cpu_to_le64(dma);
676+
txbd_tmp.buf_len = cpu_to_le16(len);
677+
678+
tx_swbd->dma = dma;
679+
tx_swbd->len = len;
680+
tx_swbd->is_dma_page = 0;
681+
tx_swbd->dir = DMA_TO_DEVICE;
682+
}
683+
684+
frag = &skb_shinfo(skb)->frags[0];
685+
for (f = 0; f < skb_shinfo(skb)->nr_frags; f++, frag++) {
686+
if (txbd)
687+
*txbd = txbd_tmp;
688+
689+
len = skb_frag_size(frag);
690+
dma = skb_frag_dma_map(tx_ring->dev, frag);
691+
if (dma_mapping_error(tx_ring->dev, dma))
692+
return -ENOMEM;
693+
694+
/* Get the next BD */
695+
enetc_bdr_idx_inc(tx_ring, i);
696+
txbd = ENETC_TXBD(*tx_ring, *i);
697+
tx_swbd = &tx_ring->tx_swbd[*i];
698+
prefetchw(txbd);
699+
*count += 1;
700+
701+
enetc_clear_tx_bd(&txbd_tmp);
702+
txbd_tmp.addr = cpu_to_le64(dma);
703+
txbd_tmp.buf_len = cpu_to_le16(len);
704+
705+
tx_swbd->dma = dma;
706+
tx_swbd->len = len;
707+
tx_swbd->is_dma_page = 1;
708+
tx_swbd->dir = DMA_TO_DEVICE;
709+
}
710+
711+
/* Last BD needs 'F' bit set */
712+
flags |= ENETC_TXBD_FLAGS_F;
713+
txbd_tmp.flags = flags;
714+
*txbd = txbd_tmp;
715+
716+
tx_swbd->is_eof = 1;
717+
tx_swbd->skb = skb;
718+
719+
return 0;
720+
}
721+
722+
static int enetc_lso_hw_offload(struct enetc_bdr *tx_ring, struct sk_buff *skb)
723+
{
724+
struct enetc_tx_swbd *tx_swbd;
725+
struct enetc_lso_t lso = {0};
726+
int err, i, count = 0;
727+
728+
/* Initialize the LSO handler */
729+
enetc_lso_start(skb, &lso);
730+
i = tx_ring->next_to_use;
731+
732+
enetc_lso_map_hdr(tx_ring, skb, &i, &lso);
733+
/* First BD and an extend BD */
734+
count += 2;
735+
736+
err = enetc_lso_map_data(tx_ring, skb, &i, &lso, &count);
737+
if (err)
738+
goto dma_err;
739+
740+
/* Go to the next BD */
741+
enetc_bdr_idx_inc(tx_ring, &i);
742+
tx_ring->next_to_use = i;
743+
enetc_update_tx_ring_tail(tx_ring);
744+
745+
return count;
746+
747+
dma_err:
748+
do {
749+
tx_swbd = &tx_ring->tx_swbd[i];
750+
enetc_free_tx_frame(tx_ring, tx_swbd);
751+
if (i == 0)
752+
i = tx_ring->bd_count;
753+
i--;
754+
} while (--count);
755+
756+
return 0;
757+
}
758+
535759
static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
536760
{
537761
struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
@@ -652,14 +876,26 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
652876
tx_ring = priv->tx_ring[skb->queue_mapping];
653877

654878
if (skb_is_gso(skb)) {
655-
if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) {
656-
netif_stop_subqueue(ndev, tx_ring->index);
657-
return NETDEV_TX_BUSY;
658-
}
879+
/* LSO data unit lengths of up to 256KB are supported */
880+
if (priv->active_offloads & ENETC_F_LSO &&
881+
(skb->len - enetc_lso_get_hdr_len(skb)) <=
882+
ENETC_LSO_MAX_DATA_LEN) {
883+
if (enetc_bd_unused(tx_ring) < enetc_lso_count_descs(skb)) {
884+
netif_stop_subqueue(ndev, tx_ring->index);
885+
return NETDEV_TX_BUSY;
886+
}
659887

660-
enetc_lock_mdio();
661-
count = enetc_map_tx_tso_buffs(tx_ring, skb);
662-
enetc_unlock_mdio();
888+
count = enetc_lso_hw_offload(tx_ring, skb);
889+
} else {
890+
if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) {
891+
netif_stop_subqueue(ndev, tx_ring->index);
892+
return NETDEV_TX_BUSY;
893+
}
894+
895+
enetc_lock_mdio();
896+
count = enetc_map_tx_tso_buffs(tx_ring, skb);
897+
enetc_unlock_mdio();
898+
}
663899
} else {
664900
if (unlikely(skb_shinfo(skb)->nr_frags > priv->max_frags))
665901
if (unlikely(skb_linearize(skb)))
@@ -1799,6 +2035,9 @@ void enetc_get_si_caps(struct enetc_si *si)
17992035
rss = enetc_rd(hw, ENETC_SIRSSCAPR);
18002036
si->num_rss = ENETC_SIRSSCAPR_GET_NUM_RSS(rss);
18012037
}
2038+
2039+
if (val & ENETC_SIPCAPR0_LSO)
2040+
si->hw_features |= ENETC_SI_F_LSO;
18022041
}
18032042
EXPORT_SYMBOL_GPL(enetc_get_si_caps);
18042043

@@ -2095,6 +2334,14 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
20952334
return 0;
20962335
}
20972336

2337+
static void enetc_set_lso_flags_mask(struct enetc_hw *hw)
2338+
{
2339+
enetc_wr(hw, ENETC4_SILSOSFMR0,
2340+
SILSOSFMR0_VAL_SET(ENETC4_TCP_NL_SEG_FLAGS_DMASK,
2341+
ENETC4_TCP_NL_SEG_FLAGS_DMASK));
2342+
enetc_wr(hw, ENETC4_SILSOSFMR1, 0);
2343+
}
2344+
20982345
int enetc_configure_si(struct enetc_ndev_priv *priv)
20992346
{
21002347
struct enetc_si *si = priv->si;
@@ -2108,6 +2355,9 @@ int enetc_configure_si(struct enetc_ndev_priv *priv)
21082355
/* enable SI */
21092356
enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN);
21102357

2358+
if (si->hw_features & ENETC_SI_F_LSO)
2359+
enetc_set_lso_flags_mask(hw);
2360+
21112361
/* TODO: RSS support for i.MX95 will be supported later, and the
21122362
* is_enetc_rev1() condition will be removed
21132363
*/

drivers/net/ethernet/freescale/enetc/enetc.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ struct enetc_tx_swbd {
4141
u8 qbv_en:1;
4242
};
4343

44+
struct enetc_lso_t {
45+
bool ipv6;
46+
bool tcp;
47+
u8 l3_hdr_len;
48+
u8 hdr_len; /* LSO header length */
49+
u8 l3_start;
50+
u16 lso_seg_size;
51+
int total_len; /* total data length, not include LSO header */
52+
};
53+
54+
#define ENETC_LSO_MAX_DATA_LEN SZ_256K
55+
4456
#define ENETC_RX_MAXFRM_SIZE ENETC_MAC_MAXFRM_SIZE
4557
#define ENETC_RXB_TRUESIZE 2048 /* PAGE_SIZE >> 1 */
4658
#define ENETC_RXB_PAD NET_SKB_PAD /* add extra space if needed */
@@ -238,6 +250,7 @@ enum enetc_errata {
238250
#define ENETC_SI_F_PSFP BIT(0)
239251
#define ENETC_SI_F_QBV BIT(1)
240252
#define ENETC_SI_F_QBU BIT(2)
253+
#define ENETC_SI_F_LSO BIT(3)
241254

242255
struct enetc_drvdata {
243256
u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */
@@ -351,6 +364,7 @@ enum enetc_active_offloads {
351364
ENETC_F_QCI = BIT(10),
352365
ENETC_F_QBU = BIT(11),
353366
ENETC_F_TXCSUM = BIT(12),
367+
ENETC_F_LSO = BIT(13),
354368
};
355369

356370
enum enetc_flags_bit {

drivers/net/ethernet/freescale/enetc/enetc4_hw.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,29 @@
1212
#define NXP_ENETC_VENDOR_ID 0x1131
1313
#define NXP_ENETC_PF_DEV_ID 0xe101
1414

15+
/**********************Station interface registers************************/
16+
/* Station interface LSO segmentation flag mask register 0/1 */
17+
#define ENETC4_SILSOSFMR0 0x1300
18+
#define SILSOSFMR0_TCP_MID_SEG GENMASK(27, 16)
19+
#define SILSOSFMR0_TCP_1ST_SEG GENMASK(11, 0)
20+
#define SILSOSFMR0_VAL_SET(first, mid) (FIELD_PREP(SILSOSFMR0_TCP_MID_SEG, mid) | \
21+
FIELD_PREP(SILSOSFMR0_TCP_1ST_SEG, first))
22+
23+
#define ENETC4_SILSOSFMR1 0x1304
24+
#define SILSOSFMR1_TCP_LAST_SEG GENMASK(11, 0)
25+
#define ENETC4_TCP_FLAGS_FIN BIT(0)
26+
#define ENETC4_TCP_FLAGS_SYN BIT(1)
27+
#define ENETC4_TCP_FLAGS_RST BIT(2)
28+
#define ENETC4_TCP_FLAGS_PSH BIT(3)
29+
#define ENETC4_TCP_FLAGS_ACK BIT(4)
30+
#define ENETC4_TCP_FLAGS_URG BIT(5)
31+
#define ENETC4_TCP_FLAGS_ECE BIT(6)
32+
#define ENETC4_TCP_FLAGS_CWR BIT(7)
33+
#define ENETC4_TCP_FLAGS_NS BIT(8)
34+
/* According to tso_build_hdr(), clear all special flags for not last packet. */
35+
#define ENETC4_TCP_NL_SEG_FLAGS_DMASK (ENETC4_TCP_FLAGS_FIN | \
36+
ENETC4_TCP_FLAGS_RST | ENETC4_TCP_FLAGS_PSH)
37+
1538
/***************************ENETC port registers**************************/
1639
#define ENETC4_ECAPR0 0x0
1740
#define ECAPR0_RFS BIT(2)

0 commit comments

Comments
 (0)