Skip to content

Commit b02fd3f

Browse files
chandramohan-akularleon
authored andcommitted
RDMA/bnxt_re: Report async events and errors
Report QP, SRQ and CQ async events and errors. Signed-off-by: Chandramohan Akula <[email protected]> Signed-off-by: Selvin Xavier <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]>
1 parent d60a779 commit b02fd3f

File tree

1 file changed

+156
-9
lines changed
  • drivers/infiniband/hw/bnxt_re

1 file changed

+156
-9
lines changed

drivers/infiniband/hw/bnxt_re/main.c

Lines changed: 156 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -970,6 +970,9 @@ static int bnxt_re_handle_unaffi_async_event(struct creq_func_event
970970
static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
971971
struct bnxt_re_qp *qp)
972972
{
973+
struct bnxt_re_srq *srq = container_of(qp->qplib_qp.srq, struct bnxt_re_srq,
974+
qplib_srq);
975+
struct creq_qp_error_notification *err_event;
973976
struct ib_event event = {};
974977
unsigned int flags;
975978

@@ -980,33 +983,177 @@ static int bnxt_re_handle_qp_async_event(struct creq_qp_event *qp_event,
980983
bnxt_re_unlock_cqs(qp, flags);
981984
}
982985

983-
if (qp->qplib_qp.srq) {
984-
event.device = &qp->rdev->ibdev;
985-
event.element.qp = &qp->ib_qp;
986-
event.event = IB_EVENT_QP_LAST_WQE_REACHED;
986+
event.device = &qp->rdev->ibdev;
987+
event.element.qp = &qp->ib_qp;
988+
event.event = IB_EVENT_QP_FATAL;
989+
990+
err_event = (struct creq_qp_error_notification *)qp_event;
991+
992+
switch (err_event->req_err_state_reason) {
993+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_OPCODE_ERROR:
994+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TIMEOUT_RETRY_LIMIT:
995+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RNR_TIMEOUT_RETRY_LIMIT:
996+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_2:
997+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_3:
998+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_READ_RESP:
999+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_BIND:
1000+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_FAST_REG:
1001+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ILLEGAL_INVALIDATE:
1002+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETRAN_LOCAL_ERROR:
1003+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_AV_DOMAIN_ERROR:
1004+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PROD_WQE_MSMTCH_ERROR:
1005+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_PSN_RANGE_CHECK_ERROR:
1006+
event.event = IB_EVENT_QP_ACCESS_ERR;
1007+
break;
1008+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_1:
1009+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_NAK_ARRIVAL_4:
1010+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_READ_RESP_LENGTH:
1011+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_WQE_FORMAT_ERROR:
1012+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_ORRQ_FORMAT_ERROR:
1013+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_AVID_ERROR:
1014+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_SERV_TYPE_ERROR:
1015+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_INVALID_OP_ERROR:
1016+
event.event = IB_EVENT_QP_REQ_ERR;
1017+
break;
1018+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_MEMORY_ERROR:
1019+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_MEMORY_ERROR:
1020+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CMP_ERROR:
1021+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_CQ_LOAD_ERROR:
1022+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_TX_PCI_ERROR:
1023+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RX_PCI_ERROR:
1024+
case CREQ_QP_ERROR_NOTIFICATION_REQ_ERR_STATE_REASON_REQ_RETX_SETUP_ERROR:
1025+
event.event = IB_EVENT_QP_FATAL;
1026+
break;
1027+
1028+
default:
1029+
break;
9871030
}
9881031

989-
if (event.device && qp->ib_qp.event_handler)
1032+
switch (err_event->res_err_state_reason) {
1033+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEED_MAX:
1034+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PAYLOAD_LENGTH_MISMATCH:
1035+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_SEQ_ERROR_RETRY_LIMIT:
1036+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_INVALID_R_KEY:
1037+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_DOMAIN_ERROR:
1038+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_NO_PERMISSION:
1039+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_RANGE_ERROR:
1040+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_INVALID_R_KEY:
1041+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_DOMAIN_ERROR:
1042+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_NO_PERMISSION:
1043+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_RANGE_ERROR:
1044+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNALIGN_ATOMIC:
1045+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_PSN_NOT_FOUND:
1046+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_INVALID_DUP_RKEY:
1047+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_FORMAT_ERROR:
1048+
event.event = IB_EVENT_QP_ACCESS_ERR;
1049+
break;
1050+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_EXCEEDS_WQE:
1051+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_WQE_FORMAT_ERROR:
1052+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_UNSUPPORTED_OPCODE:
1053+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_REM_INVALIDATE:
1054+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_OPCODE_ERROR:
1055+
event.event = IB_EVENT_QP_REQ_ERR;
1056+
break;
1057+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_IRRQ_OFLOW:
1058+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CMP_ERROR:
1059+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_CQ_LOAD_ERROR:
1060+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_TX_PCI_ERROR:
1061+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_RX_PCI_ERROR:
1062+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_MEMORY_ERROR:
1063+
event.event = IB_EVENT_QP_FATAL;
1064+
break;
1065+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_LOAD_ERROR:
1066+
case CREQ_QP_ERROR_NOTIFICATION_RES_ERR_STATE_REASON_RES_SRQ_ERROR:
1067+
if (srq)
1068+
event.event = IB_EVENT_SRQ_ERR;
1069+
break;
1070+
default:
1071+
break;
1072+
}
1073+
1074+
if (err_event->res_err_state_reason || err_event->req_err_state_reason) {
1075+
ibdev_dbg(&qp->rdev->ibdev,
1076+
"%s %s qp_id: %d cons (%d %d) req (%d %d) res (%d %d)\n",
1077+
__func__, rdma_is_kernel_res(&qp->ib_qp.res) ? "kernel" : "user",
1078+
qp->qplib_qp.id,
1079+
err_event->sq_cons_idx,
1080+
err_event->rq_cons_idx,
1081+
err_event->req_slow_path_state,
1082+
err_event->req_err_state_reason,
1083+
err_event->res_slow_path_state,
1084+
err_event->res_err_state_reason);
1085+
} else {
1086+
if (srq)
1087+
event.event = IB_EVENT_QP_LAST_WQE_REACHED;
1088+
}
1089+
1090+
if (event.event == IB_EVENT_SRQ_ERR && srq->ib_srq.event_handler) {
1091+
(*srq->ib_srq.event_handler)(&event,
1092+
srq->ib_srq.srq_context);
1093+
} else if (event.device && qp->ib_qp.event_handler) {
9901094
qp->ib_qp.event_handler(&event, qp->ib_qp.qp_context);
1095+
}
1096+
1097+
return 0;
1098+
}
1099+
1100+
static int bnxt_re_handle_cq_async_error(void *event, struct bnxt_re_cq *cq)
1101+
{
1102+
struct creq_cq_error_notification *cqerr;
1103+
struct ib_event ibevent = {};
1104+
1105+
cqerr = event;
1106+
switch (cqerr->cq_err_reason) {
1107+
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_INVALID_ERROR:
1108+
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_OVERFLOW_ERROR:
1109+
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_REQ_CQ_LOAD_ERROR:
1110+
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_INVALID_ERROR:
1111+
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_OVERFLOW_ERROR:
1112+
case CREQ_CQ_ERROR_NOTIFICATION_CQ_ERR_REASON_RES_CQ_LOAD_ERROR:
1113+
ibevent.event = IB_EVENT_CQ_ERR;
1114+
default:
1115+
break;
1116+
}
1117+
1118+
if (ibevent.event == IB_EVENT_CQ_ERR && cq->ib_cq.event_handler) {
1119+
ibevent.element.cq = &cq->ib_cq;
1120+
ibevent.device = &cq->rdev->ibdev;
1121+
1122+
ibdev_dbg(&cq->rdev->ibdev,
1123+
"%s err reason %d\n", __func__, cqerr->cq_err_reason);
1124+
cq->ib_cq.event_handler(&ibevent, cq->ib_cq.cq_context);
1125+
}
9911126

9921127
return 0;
9931128
}
9941129

9951130
static int bnxt_re_handle_affi_async_event(struct creq_qp_event *affi_async,
9961131
void *obj)
9971132
{
1133+
struct bnxt_qplib_qp *lib_qp;
1134+
struct bnxt_qplib_cq *lib_cq;
1135+
struct bnxt_re_qp *qp;
1136+
struct bnxt_re_cq *cq;
9981137
int rc = 0;
9991138
u8 event;
10001139

10011140
if (!obj)
10021141
return rc; /* QP was already dead, still return success */
10031142

10041143
event = affi_async->event;
1005-
if (event == CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION) {
1006-
struct bnxt_qplib_qp *lib_qp = obj;
1007-
struct bnxt_re_qp *qp = container_of(lib_qp, struct bnxt_re_qp,
1008-
qplib_qp);
1144+
switch (event) {
1145+
case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
1146+
lib_qp = obj;
1147+
qp = container_of(lib_qp, struct bnxt_re_qp, qplib_qp);
10091148
rc = bnxt_re_handle_qp_async_event(affi_async, qp);
1149+
break;
1150+
case CREQ_QP_EVENT_EVENT_CQ_ERROR_NOTIFICATION:
1151+
lib_cq = obj;
1152+
cq = container_of(lib_cq, struct bnxt_re_cq, qplib_cq);
1153+
rc = bnxt_re_handle_cq_async_error(affi_async, cq);
1154+
break;
1155+
default:
1156+
rc = -EINVAL;
10101157
}
10111158
return rc;
10121159
}

0 commit comments

Comments
 (0)