Skip to content

Commit f0c875f

Browse files
kadesai16rleon
authored andcommitted
RDMA/bnxt_re: use firmware provided max request timeout
Firmware provides max request timeout value as part of hwrm_ver_get API. Driver gets the timeout from firmware and if that interface is not available then fall back to hardcoded timeout value. Also, Add a helper function to check the FW status. Signed-off-by: Kashyap Desai <[email protected]> Signed-off-by: Selvin Xavier <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]>
1 parent a002785 commit f0c875f

File tree

4 files changed

+60
-12
lines changed

4 files changed

+60
-12
lines changed

drivers/infiniband/hw/bnxt_re/main.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,7 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
10411041
struct bnxt_en_dev *en_dev = rdev->en_dev;
10421042
struct hwrm_ver_get_output resp = {0};
10431043
struct hwrm_ver_get_input req = {0};
1044+
struct bnxt_qplib_chip_ctx *cctx;
10441045
struct bnxt_fw_msg fw_msg;
10451046
int rc = 0;
10461047

@@ -1058,11 +1059,18 @@ static void bnxt_re_query_hwrm_intf_version(struct bnxt_re_dev *rdev)
10581059
rc);
10591060
return;
10601061
}
1062+
1063+
cctx = rdev->chip_ctx;
10611064
rdev->qplib_ctx.hwrm_intf_ver =
10621065
(u64)le16_to_cpu(resp.hwrm_intf_major) << 48 |
10631066
(u64)le16_to_cpu(resp.hwrm_intf_minor) << 32 |
10641067
(u64)le16_to_cpu(resp.hwrm_intf_build) << 16 |
10651068
le16_to_cpu(resp.hwrm_intf_patch);
1069+
1070+
cctx->hwrm_cmd_max_timeout = le16_to_cpu(resp.max_req_timeout);
1071+
1072+
if (!cctx->hwrm_cmd_max_timeout)
1073+
cctx->hwrm_cmd_max_timeout = RCFW_FW_STALL_MAX_TIMEOUT;
10661074
}
10671075

10681076
static int bnxt_re_ib_init(struct bnxt_re_dev *rdev)

drivers/infiniband/hw/bnxt_re/qplib_rcfw.c

Lines changed: 48 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,41 @@ static int bnxt_qplib_map_rc(u8 opcode)
8989
}
9090
}
9191

92+
/**
93+
* bnxt_re_is_fw_stalled - Check firmware health
94+
* @rcfw - rcfw channel instance of rdev
95+
* @cookie - cookie to track the command
96+
* @opcode - rcfw submitted for given opcode
97+
* @cbit - bitmap entry of cookie
98+
*
99+
* If firmware has not responded any rcfw command within
100+
* rcfw->max_timeout, consider firmware as stalled.
101+
*
102+
* Returns:
103+
* 0 if firmware is responding
104+
* -ENODEV if firmware is not responding
105+
*/
106+
static int bnxt_re_is_fw_stalled(struct bnxt_qplib_rcfw *rcfw,
107+
u16 cookie, u8 opcode, u16 cbit)
108+
{
109+
struct bnxt_qplib_cmdq_ctx *cmdq;
110+
111+
cmdq = &rcfw->cmdq;
112+
113+
if (time_after(jiffies, cmdq->last_seen +
114+
(rcfw->max_timeout * HZ))) {
115+
dev_warn_ratelimited(&rcfw->pdev->dev,
116+
"%s: FW STALL Detected. cmdq[%#x]=%#x waited (%d > %d) msec active %d ",
117+
__func__, cookie, opcode,
118+
jiffies_to_msecs(jiffies - cmdq->last_seen),
119+
rcfw->max_timeout * 1000,
120+
test_bit(cbit, cmdq->cmdq_bitmap));
121+
return -ENODEV;
122+
}
123+
124+
return 0;
125+
}
126+
92127
/**
93128
* __wait_for_resp - Don't hold the cpu context and wait for response
94129
* @rcfw - rcfw channel instance of rdev
@@ -105,6 +140,7 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
105140
{
106141
struct bnxt_qplib_cmdq_ctx *cmdq;
107142
u16 cbit;
143+
int ret;
108144

109145
cmdq = &rcfw->cmdq;
110146
cbit = cookie % rcfw->cmdq_depth;
@@ -118,8 +154,8 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
118154
wait_event_timeout(cmdq->waitq,
119155
!test_bit(cbit, cmdq->cmdq_bitmap) ||
120156
test_bit(ERR_DEVICE_DETACHED, &cmdq->flags),
121-
msecs_to_jiffies(RCFW_FW_STALL_TIMEOUT_SEC
122-
* 1000));
157+
msecs_to_jiffies(rcfw->max_timeout * 1000));
158+
123159
if (!test_bit(cbit, cmdq->cmdq_bitmap))
124160
return 0;
125161

@@ -128,10 +164,9 @@ static int __wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie, u8 opcode)
128164
if (!test_bit(cbit, cmdq->cmdq_bitmap))
129165
return 0;
130166

131-
/* Firmware stall is detected */
132-
if (time_after(jiffies, cmdq->last_seen +
133-
(RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
134-
return -ENODEV;
167+
ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
168+
if (ret)
169+
return ret;
135170

136171
} while (true);
137172
};
@@ -352,6 +387,7 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
352387
struct bnxt_qplib_cmdq_ctx *cmdq = &rcfw->cmdq;
353388
unsigned long issue_time;
354389
u16 cbit;
390+
int ret;
355391

356392
cbit = cookie % rcfw->cmdq_depth;
357393
issue_time = jiffies;
@@ -368,11 +404,10 @@ static int __poll_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie,
368404
if (!test_bit(cbit, cmdq->cmdq_bitmap))
369405
return 0;
370406
if (jiffies_to_msecs(jiffies - issue_time) >
371-
(RCFW_FW_STALL_TIMEOUT_SEC * 1000)) {
372-
/* Firmware stall is detected */
373-
if (time_after(jiffies, cmdq->last_seen +
374-
(RCFW_FW_STALL_TIMEOUT_SEC * HZ)))
375-
return -ENODEV;
407+
(rcfw->max_timeout * 1000)) {
408+
ret = bnxt_re_is_fw_stalled(rcfw, cookie, opcode, cbit);
409+
if (ret)
410+
return ret;
376411
}
377412
} while (true);
378413
};
@@ -951,6 +986,8 @@ int bnxt_qplib_alloc_rcfw_channel(struct bnxt_qplib_res *res,
951986
if (!rcfw->qp_tbl)
952987
goto fail;
953988

989+
rcfw->max_timeout = res->cctx->hwrm_cmd_max_timeout;
990+
954991
return 0;
955992

956993
fail:

drivers/infiniband/hw/bnxt_re/qplib_rcfw.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151

5252
#define RCFW_DBR_PCI_BAR_REGION 2
5353
#define RCFW_DBR_BASE_PAGE_SHIFT 12
54-
#define RCFW_FW_STALL_TIMEOUT_SEC 40
54+
#define RCFW_FW_STALL_MAX_TIMEOUT 40
5555

5656
/* Cmdq contains a fix number of a 16-Byte slots */
5757
struct bnxt_qplib_cmdqe {
@@ -227,6 +227,8 @@ struct bnxt_qplib_rcfw {
227227
atomic_t rcfw_intr_enabled;
228228
struct semaphore rcfw_inflight;
229229
atomic_t timeout_send;
230+
/* cached from chip cctx for quick reference in slow path */
231+
u16 max_timeout;
230232
};
231233

232234
struct bnxt_qplib_cmdqmsg {

drivers/infiniband/hw/bnxt_re/qplib_res.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ struct bnxt_qplib_chip_ctx {
5555
u8 chip_rev;
5656
u8 chip_metal;
5757
u16 hw_stats_size;
58+
u16 hwrm_cmd_max_timeout;
5859
struct bnxt_qplib_drv_modes modes;
5960
};
6061

0 commit comments

Comments
 (0)