Skip to content

Commit bbddfa2

Browse files
Chengchang Tangrleon
authored andcommitted
RDMA/hns: Fix mbx timing out before CMD execution is completed
When a large number of tasks are issued, the speed of HW processing mbx will slow down. The standard for judging mbx timeout in the current firmware is 30ms, and the current timeout standard for the driver is also 30ms. Considering that firmware scheduling in multi-function scenarios takes a certain amount of time, this will cause the driver to time out too early and report a failure before mbx execution times out. This patch introduces a new mechanism that can set different timeouts for different cmds and extends the timeout of mbx to 35ms. Fixes: a04ff73 ("RDMA/hns: Add command queue support for hip08 RoCE driver") Signed-off-by: Chengchang Tang <[email protected]> Signed-off-by: Junxian Huang <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]>
1 parent 0b8e658 commit bbddfa2

File tree

2 files changed

+34
-7
lines changed

2 files changed

+34
-7
lines changed

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,12 +1275,38 @@ static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
12751275
return -EIO;
12761276
}
12771277

1278+
static u32 hns_roce_cmdq_tx_timeout(u16 opcode, u32 tx_timeout)
1279+
{
1280+
static const struct hns_roce_cmdq_tx_timeout_map cmdq_tx_timeout[] = {
1281+
{HNS_ROCE_OPC_POST_MB, HNS_ROCE_OPC_POST_MB_TIMEOUT},
1282+
};
1283+
int i;
1284+
1285+
for (i = 0; i < ARRAY_SIZE(cmdq_tx_timeout); i++)
1286+
if (cmdq_tx_timeout[i].opcode == opcode)
1287+
return cmdq_tx_timeout[i].tx_timeout;
1288+
1289+
return tx_timeout;
1290+
}
1291+
1292+
static void hns_roce_wait_csq_done(struct hns_roce_dev *hr_dev, u16 opcode)
1293+
{
1294+
struct hns_roce_v2_priv *priv = hr_dev->priv;
1295+
u32 tx_timeout = hns_roce_cmdq_tx_timeout(opcode, priv->cmq.tx_timeout);
1296+
u32 timeout = 0;
1297+
1298+
do {
1299+
if (hns_roce_cmq_csq_done(hr_dev))
1300+
break;
1301+
udelay(1);
1302+
} while (++timeout < tx_timeout);
1303+
}
1304+
12781305
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
12791306
struct hns_roce_cmq_desc *desc, int num)
12801307
{
12811308
struct hns_roce_v2_priv *priv = hr_dev->priv;
12821309
struct hns_roce_v2_cmq_ring *csq = &priv->cmq.csq;
1283-
u32 timeout = 0;
12841310
u16 desc_ret;
12851311
u32 tail;
12861312
int ret;
@@ -1301,12 +1327,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
13011327

13021328
atomic64_inc(&hr_dev->dfx_cnt[HNS_ROCE_DFX_CMDS_CNT]);
13031329

1304-
do {
1305-
if (hns_roce_cmq_csq_done(hr_dev))
1306-
break;
1307-
udelay(1);
1308-
} while (++timeout < priv->cmq.tx_timeout);
1309-
1330+
hns_roce_wait_csq_done(hr_dev, le16_to_cpu(desc->opcode));
13101331
if (hns_roce_cmq_csq_done(hr_dev)) {
13111332
ret = 0;
13121333
for (i = 0; i < num; i++) {

drivers/infiniband/hw/hns/hns_roce_hw_v2.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,12 @@ enum hns_roce_opcode_type {
224224
HNS_SWITCH_PARAMETER_CFG = 0x1033,
225225
};
226226

227+
#define HNS_ROCE_OPC_POST_MB_TIMEOUT 35000
228+
struct hns_roce_cmdq_tx_timeout_map {
229+
u16 opcode;
230+
u32 tx_timeout;
231+
};
232+
227233
enum {
228234
TYPE_CRQ,
229235
TYPE_CSQ,

0 commit comments

Comments
 (0)