Skip to content

Commit 4ff7d14

Browse files
Geetha sowjanyadavem330
authored andcommitted
octeontx2-pf: Error handling support
HW reports many errors on the receive and transmit paths. Such as incorrect queue configuration, pkt transmission errors, LMTST instruction errors, transmit queue full etc. These are reported via QINT interrupt. Most of the errors are fatal and needs reinitialization. Also added support to allocate receive buffers in non-atomic context when allocation fails in NAPI context. Signed-off-by: Geetha sowjanya <[email protected]> Signed-off-by: Aleksey Makarov <[email protected]> Signed-off-by: Sunil Goutham <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 34bfe0e commit 4ff7d14

File tree

6 files changed

+255
-3
lines changed

6 files changed

+255
-3
lines changed

drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,13 @@ dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
154154
return iova;
155155
}
156156

157+
void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
158+
{
159+
struct otx2_nic *pfvf = netdev_priv(netdev);
160+
161+
schedule_work(&pfvf->reset_task);
162+
}
163+
157164
void otx2_get_mac_from_af(struct net_device *netdev)
158165
{
159166
struct otx2_nic *pfvf = netdev_priv(netdev);
@@ -362,6 +369,7 @@ static int otx2_rq_init(struct otx2_nic *pfvf, u16 qidx, u16 lpb_aura)
362369
aq->rq.lpb_sizem1 = (DMA_BUFFER_LEN(pfvf->rbsize) / 8) - 1;
363370
aq->rq.xqe_imm_size = 0; /* Copying of packet to CQE not needed */
364371
aq->rq.flow_tagw = 32; /* Copy full 32bit flow_tag to CQE header */
372+
aq->rq.qint_idx = 0;
365373
aq->rq.lpb_drop_ena = 1; /* Enable RED dropping for AURA */
366374
aq->rq.xqe_drop_ena = 1; /* Enable RED dropping for CQ/SSO */
367375
aq->rq.xqe_pass = RQ_PASS_LVL_CQ(pfvf->hw.rq_skid, qset->rqe_cnt);
@@ -424,6 +432,8 @@ static int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura)
424432
aq->sq.default_chan = pfvf->hw.tx_chan_base;
425433
aq->sq.sqe_stype = NIX_STYPE_STF; /* Cache SQB */
426434
aq->sq.sqb_aura = sqb_aura;
435+
aq->sq.sq_int_ena = NIX_SQINT_BITS;
436+
aq->sq.qint_idx = 0;
427437
/* Due pipelining impact minimum 2000 unused SQ CQE's
428438
* need to maintain to avoid CQ overflow.
429439
*/
@@ -470,6 +480,7 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
470480
pool_id = ((cq->cq_type == CQ_RX) &&
471481
(pfvf->hw.rqpool_cnt != pfvf->hw.rx_queues)) ? 0 : qidx;
472482
cq->rbpool = &qset->pool[pool_id];
483+
cq->refill_task_sched = false;
473484

474485
/* Get memory to put this msg */
475486
aq = otx2_mbox_alloc_msg_nix_aq_enq(&pfvf->mbox);
@@ -481,6 +492,8 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
481492
aq->cq.caching = 1;
482493
aq->cq.base = cq->cqe->iova;
483494
aq->cq.cint_idx = cq->cint_idx;
495+
aq->cq.cq_err_int_ena = NIX_CQERRINT_BITS;
496+
aq->cq.qint_idx = 0;
484497
aq->cq.avg_level = 255;
485498

486499
if (qidx < pfvf->hw.rx_queues) {
@@ -496,6 +509,45 @@ static int otx2_cq_init(struct otx2_nic *pfvf, u16 qidx)
496509
return otx2_sync_mbox_msg(&pfvf->mbox);
497510
}
498511

512+
static void otx2_pool_refill_task(struct work_struct *work)
513+
{
514+
struct otx2_cq_queue *cq;
515+
struct otx2_pool *rbpool;
516+
struct refill_work *wrk;
517+
int qidx, free_ptrs = 0;
518+
struct otx2_nic *pfvf;
519+
s64 bufptr;
520+
521+
wrk = container_of(work, struct refill_work, pool_refill_work.work);
522+
pfvf = wrk->pf;
523+
qidx = wrk - pfvf->refill_wrk;
524+
cq = &pfvf->qset.cq[qidx];
525+
rbpool = cq->rbpool;
526+
free_ptrs = cq->pool_ptrs;
527+
528+
while (cq->pool_ptrs) {
529+
bufptr = otx2_alloc_rbuf(pfvf, rbpool, GFP_KERNEL);
530+
if (bufptr <= 0) {
531+
/* Schedule a WQ if we fails to free atleast half of the
532+
* pointers else enable napi for this RQ.
533+
*/
534+
if (!((free_ptrs - cq->pool_ptrs) > free_ptrs / 2)) {
535+
struct delayed_work *dwork;
536+
537+
dwork = &wrk->pool_refill_work;
538+
schedule_delayed_work(dwork,
539+
msecs_to_jiffies(100));
540+
} else {
541+
cq->refill_task_sched = false;
542+
}
543+
return;
544+
}
545+
otx2_aura_freeptr(pfvf, qidx, bufptr + OTX2_HEAD_ROOM);
546+
cq->pool_ptrs--;
547+
}
548+
cq->refill_task_sched = false;
549+
}
550+
499551
int otx2_config_nix_queues(struct otx2_nic *pfvf)
500552
{
501553
int qidx, err;
@@ -525,6 +577,17 @@ int otx2_config_nix_queues(struct otx2_nic *pfvf)
525577
return err;
526578
}
527579

580+
/* Initialize work queue for receive buffer refill */
581+
pfvf->refill_wrk = devm_kcalloc(pfvf->dev, pfvf->qset.cq_cnt,
582+
sizeof(struct refill_work), GFP_KERNEL);
583+
if (!pfvf->refill_wrk)
584+
return -ENOMEM;
585+
586+
for (qidx = 0; qidx < pfvf->qset.cq_cnt; qidx++) {
587+
pfvf->refill_wrk[qidx].pf = pfvf;
588+
INIT_DELAYED_WORK(&pfvf->refill_wrk[qidx].pool_refill_work,
589+
otx2_pool_refill_task);
590+
}
528591
return 0;
529592
}
530593

drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,11 @@ struct otx2_hw {
129129
struct otx2_drv_stats drv_stats;
130130
};
131131

132+
struct refill_work {
133+
struct delayed_work pool_refill_work;
134+
struct otx2_nic *pf;
135+
};
136+
132137
struct otx2_nic {
133138
void __iomem *reg_base;
134139
struct net_device *netdev;
@@ -151,6 +156,10 @@ struct otx2_nic {
151156
u16 pcifunc; /* RVU PF_FUNC */
152157
struct cgx_link_user_info linfo;
153158

159+
u64 reset_count;
160+
struct work_struct reset_task;
161+
struct refill_work *refill_wrk;
162+
154163
/* Block address of NIX either BLKADDR_NIX0 or BLKADDR_NIX1 */
155164
int nix_blkaddr;
156165
};
@@ -435,6 +444,9 @@ otx2_mbox_up_handler_ ## _fn_name(struct otx2_nic *pfvf, \
435444
MBOX_UP_CGX_MESSAGES
436445
#undef M
437446

447+
/* Time to wait before watchdog kicks off */
448+
#define OTX2_TX_TIMEOUT (100 * HZ)
449+
438450
#define RVU_PFVF_PF_SHIFT 10
439451
#define RVU_PFVF_PF_MASK 0x3F
440452
#define RVU_PFVF_FUNC_SHIFT 0
@@ -472,6 +484,7 @@ void otx2_free_cints(struct otx2_nic *pfvf, int n);
472484
void otx2_set_cints_affinity(struct otx2_nic *pfvf);
473485
int otx2_set_mac_address(struct net_device *netdev, void *p);
474486
int otx2_hw_set_mtu(struct otx2_nic *pfvf, int mtu);
487+
void otx2_tx_timeout(struct net_device *netdev, unsigned int txq);
475488
void otx2_get_mac_from_af(struct net_device *netdev);
476489
void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx);
477490

drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c

Lines changed: 132 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,85 @@ static int otx2_set_real_num_queues(struct net_device *netdev,
478478
return err;
479479
}
480480

481+
static irqreturn_t otx2_q_intr_handler(int irq, void *data)
482+
{
483+
struct otx2_nic *pf = data;
484+
u64 val, *ptr;
485+
u64 qidx = 0;
486+
487+
/* CQ */
488+
for (qidx = 0; qidx < pf->qset.cq_cnt; qidx++) {
489+
ptr = otx2_get_regaddr(pf, NIX_LF_CQ_OP_INT);
490+
val = otx2_atomic64_add((qidx << 44), ptr);
491+
492+
otx2_write64(pf, NIX_LF_CQ_OP_INT, (qidx << 44) |
493+
(val & NIX_CQERRINT_BITS));
494+
if (!(val & (NIX_CQERRINT_BITS | BIT_ULL(42))))
495+
continue;
496+
497+
if (val & BIT_ULL(42)) {
498+
netdev_err(pf->netdev, "CQ%lld: error reading NIX_LF_CQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
499+
qidx, otx2_read64(pf, NIX_LF_ERR_INT));
500+
} else {
501+
if (val & BIT_ULL(NIX_CQERRINT_DOOR_ERR))
502+
netdev_err(pf->netdev, "CQ%lld: Doorbell error",
503+
qidx);
504+
if (val & BIT_ULL(NIX_CQERRINT_CQE_FAULT))
505+
netdev_err(pf->netdev, "CQ%lld: Memory fault on CQE write to LLC/DRAM",
506+
qidx);
507+
}
508+
509+
schedule_work(&pf->reset_task);
510+
}
511+
512+
/* SQ */
513+
for (qidx = 0; qidx < pf->hw.tx_queues; qidx++) {
514+
ptr = otx2_get_regaddr(pf, NIX_LF_SQ_OP_INT);
515+
val = otx2_atomic64_add((qidx << 44), ptr);
516+
otx2_write64(pf, NIX_LF_SQ_OP_INT, (qidx << 44) |
517+
(val & NIX_SQINT_BITS));
518+
519+
if (!(val & (NIX_SQINT_BITS | BIT_ULL(42))))
520+
continue;
521+
522+
if (val & BIT_ULL(42)) {
523+
netdev_err(pf->netdev, "SQ%lld: error reading NIX_LF_SQ_OP_INT, NIX_LF_ERR_INT 0x%llx\n",
524+
qidx, otx2_read64(pf, NIX_LF_ERR_INT));
525+
} else {
526+
if (val & BIT_ULL(NIX_SQINT_LMT_ERR)) {
527+
netdev_err(pf->netdev, "SQ%lld: LMT store error NIX_LF_SQ_OP_ERR_DBG:0x%llx",
528+
qidx,
529+
otx2_read64(pf,
530+
NIX_LF_SQ_OP_ERR_DBG));
531+
otx2_write64(pf, NIX_LF_SQ_OP_ERR_DBG,
532+
BIT_ULL(44));
533+
}
534+
if (val & BIT_ULL(NIX_SQINT_MNQ_ERR)) {
535+
netdev_err(pf->netdev, "SQ%lld: Meta-descriptor enqueue error NIX_LF_MNQ_ERR_DGB:0x%llx\n",
536+
qidx,
537+
otx2_read64(pf, NIX_LF_MNQ_ERR_DBG));
538+
otx2_write64(pf, NIX_LF_MNQ_ERR_DBG,
539+
BIT_ULL(44));
540+
}
541+
if (val & BIT_ULL(NIX_SQINT_SEND_ERR)) {
542+
netdev_err(pf->netdev, "SQ%lld: Send error, NIX_LF_SEND_ERR_DBG 0x%llx",
543+
qidx,
544+
otx2_read64(pf,
545+
NIX_LF_SEND_ERR_DBG));
546+
otx2_write64(pf, NIX_LF_SEND_ERR_DBG,
547+
BIT_ULL(44));
548+
}
549+
if (val & BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
550+
netdev_err(pf->netdev, "SQ%lld: SQB allocation failed",
551+
qidx);
552+
}
553+
554+
schedule_work(&pf->reset_task);
555+
}
556+
557+
return IRQ_HANDLED;
558+
}
559+
481560
static irqreturn_t otx2_cq_intr_handler(int irq, void *cq_irq)
482561
{
483562
struct otx2_cq_poll *cq_poll = (struct otx2_cq_poll *)cq_irq;
@@ -759,6 +838,24 @@ int otx2_open(struct net_device *netdev)
759838
if (err)
760839
goto err_disable_napi;
761840

841+
/* Register Queue IRQ handlers */
842+
vec = pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START;
843+
irq_name = &pf->hw.irq_name[vec * NAME_SIZE];
844+
845+
snprintf(irq_name, NAME_SIZE, "%s-qerr", pf->netdev->name);
846+
847+
err = request_irq(pci_irq_vector(pf->pdev, vec),
848+
otx2_q_intr_handler, 0, irq_name, pf);
849+
if (err) {
850+
dev_err(pf->dev,
851+
"RVUPF%d: IRQ registration failed for QERR\n",
852+
rvu_get_pf(pf->pcifunc));
853+
goto err_disable_napi;
854+
}
855+
856+
/* Enable QINT IRQ */
857+
otx2_write64(pf, NIX_LF_QINTX_ENA_W1S(0), BIT_ULL(0));
858+
762859
/* Register CQ IRQ handlers */
763860
vec = pf->hw.nix_msixoff + NIX_LF_CINT_VEC_START;
764861
for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
@@ -803,6 +900,11 @@ int otx2_open(struct net_device *netdev)
803900

804901
err_free_cints:
805902
otx2_free_cints(pf, qidx);
903+
vec = pci_irq_vector(pf->pdev,
904+
pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
905+
otx2_write64(pf, NIX_LF_QINTX_ENA_W1C(0), BIT_ULL(0));
906+
synchronize_irq(vec);
907+
free_irq(vec, pf);
806908
err_disable_napi:
807909
otx2_disable_napi(pf);
808910
otx2_free_hw_resources(pf);
@@ -818,7 +920,7 @@ int otx2_stop(struct net_device *netdev)
818920
struct otx2_nic *pf = netdev_priv(netdev);
819921
struct otx2_cq_poll *cq_poll = NULL;
820922
struct otx2_qset *qset = &pf->qset;
821-
int qidx, vec;
923+
int qidx, vec, wrk;
822924

823925
netif_carrier_off(netdev);
824926
netif_tx_stop_all_queues(netdev);
@@ -830,6 +932,13 @@ int otx2_stop(struct net_device *netdev)
830932
/* First stop packet Rx/Tx */
831933
otx2_rxtx_enable(pf, false);
832934

935+
/* Cleanup Queue IRQ */
936+
vec = pci_irq_vector(pf->pdev,
937+
pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
938+
otx2_write64(pf, NIX_LF_QINTX_ENA_W1C(0), BIT_ULL(0));
939+
synchronize_irq(vec);
940+
free_irq(vec, pf);
941+
833942
/* Cleanup CQ NAPI and IRQ */
834943
vec = pf->hw.nix_msixoff + NIX_LF_CINT_VEC_START;
835944
for (qidx = 0; qidx < pf->hw.cint_cnt; qidx++) {
@@ -852,6 +961,10 @@ int otx2_stop(struct net_device *netdev)
852961
for (qidx = 0; qidx < netdev->num_tx_queues; qidx++)
853962
netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx));
854963

964+
for (wrk = 0; wrk < pf->qset.cq_cnt; wrk++)
965+
cancel_delayed_work_sync(&pf->refill_wrk[wrk].pool_refill_work);
966+
devm_kfree(pf->dev, pf->refill_wrk);
967+
855968
kfree(qset->sq);
856969
kfree(qset->cq);
857970
kfree(qset->napi);
@@ -931,6 +1044,19 @@ static int otx2_set_features(struct net_device *netdev,
9311044
return 0;
9321045
}
9331046

1047+
static void otx2_reset_task(struct work_struct *work)
1048+
{
1049+
struct otx2_nic *pf = container_of(work, struct otx2_nic, reset_task);
1050+
1051+
if (!netif_running(pf->netdev))
1052+
return;
1053+
1054+
otx2_stop(pf->netdev);
1055+
pf->reset_count++;
1056+
otx2_open(pf->netdev);
1057+
netif_trans_update(pf->netdev);
1058+
}
1059+
9341060
static const struct net_device_ops otx2_netdev_ops = {
9351061
.ndo_open = otx2_open,
9361062
.ndo_stop = otx2_stop,
@@ -939,6 +1065,7 @@ static const struct net_device_ops otx2_netdev_ops = {
9391065
.ndo_change_mtu = otx2_change_mtu,
9401066
.ndo_set_rx_mode = otx2_set_rx_mode,
9411067
.ndo_set_features = otx2_set_features,
1068+
.ndo_tx_timeout = otx2_tx_timeout,
9421069
};
9431070

9441071
static int otx2_check_pf_usable(struct otx2_nic *nic)
@@ -1115,12 +1242,16 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
11151242

11161243
netdev->hw_features |= NETIF_F_LOOPBACK | NETIF_F_RXALL;
11171244

1245+
netdev->watchdog_timeo = OTX2_TX_TIMEOUT;
1246+
11181247
netdev->netdev_ops = &otx2_netdev_ops;
11191248

11201249
/* MTU range: 64 - 9190 */
11211250
netdev->min_mtu = OTX2_MIN_MTU;
11221251
netdev->max_mtu = OTX2_MAX_MTU;
11231252

1253+
INIT_WORK(&pf->reset_task, otx2_reset_task);
1254+
11241255
err = register_netdev(netdev);
11251256
if (err) {
11261257
dev_err(dev, "Failed to register netdevice\n");

drivers/net/ethernet/marvell/octeontx2/nic/otx2_struct.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,4 +245,32 @@ struct nix_sqe_mem_s {
245245
u64 addr; /* W1 */
246246
};
247247

248+
enum nix_cqerrint_e {
249+
NIX_CQERRINT_DOOR_ERR = 0,
250+
NIX_CQERRINT_WR_FULL = 1,
251+
NIX_CQERRINT_CQE_FAULT = 2,
252+
};
253+
254+
#define NIX_CQERRINT_BITS (BIT_ULL(NIX_CQERRINT_DOOR_ERR) | \
255+
BIT_ULL(NIX_CQERRINT_CQE_FAULT))
256+
257+
enum nix_rqint_e {
258+
NIX_RQINT_DROP = 0,
259+
NIX_RQINT_RED = 1,
260+
};
261+
262+
#define NIX_RQINT_BITS (BIT_ULL(NIX_RQINT_DROP) | BIT_ULL(NIX_RQINT_RED))
263+
264+
enum nix_sqint_e {
265+
NIX_SQINT_LMT_ERR = 0,
266+
NIX_SQINT_MNQ_ERR = 1,
267+
NIX_SQINT_SEND_ERR = 2,
268+
NIX_SQINT_SQB_ALLOC_FAIL = 3,
269+
};
270+
271+
#define NIX_SQINT_BITS (BIT_ULL(NIX_SQINT_LMT_ERR) | \
272+
BIT_ULL(NIX_SQINT_MNQ_ERR) | \
273+
BIT_ULL(NIX_SQINT_SEND_ERR) | \
274+
BIT_ULL(NIX_SQINT_SQB_ALLOC_FAIL))
275+
248276
#endif /* OTX2_STRUCT_H */

0 commit comments

Comments
 (0)