Skip to content

Commit 9e272ed

Browse files
l00436852rleon
authored andcommitted
RDMA/hns: Disable local invalidate operation
When function reset and local invalidate are mixed, HNS RoCEE may hang. Before introducing the cause of the problem, two hardware internal concepts need to be introduced: 1. Execution queue: The queue of hardware execution instructions, function reset and local invalidate are queued for execution in this queue. 2.Local queue: A queue that stores local operation instructions. The instructions in the local queue will be sent to the execution queue for execution. The instructions in the local queue will not be removed until the execution is completed. The reason for the problem is as follows: 1. There is a function reset instruction in the execution queue, which is currently being executed. A necessary condition for the successful execution of function reset is: the hardware pipeline needs to empty the instructions that were not completed before; 2. A local invalidate instruction at the head of the local queue is sent to the execution queue. Now there are two instructions in the execution queue, the first is the function reset instruction, and the second is the local invalidate instruction, which will be executed in se quence; 3. The user has issued many local invalidate operations, causing the local queue to be filled up. 4. The user still has a new local operation command and is queuing to enter the local queue. But the local queue is full and cannot receive new instructions, this instruction is temporarily stored at the hardware pipeline. 5. The function reset has been waiting for the instruction before the hardware pipeline stage is drained. The hardware pipeline stage also caches a local invalidate instruction, so the function reset cannot be completed, and the instructions after it cannot be executed. These factors together cause the execution logic deadlock of the hardware, and the consequence is that RoCEE will not have any response. Considering that the local operation command may potentially cause RoCEE to hang, this feature is no longer supported. Fixes: e93df01 ("RDMA/hns: Support local invalidate for hip08 in kernel space") Signed-off-by: Yangyang Li <[email protected]> Signed-off-by: Wenpeng Liang <[email protected]> Signed-off-by: Haoyue Xu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Leon Romanovsky <[email protected]>
1 parent b75927c commit 9e272ed

File tree

2 files changed

+0
-13
lines changed

2 files changed

+0
-13
lines changed

drivers/infiniband/hw/hns/hns_roce_hw_v2.c

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,6 @@ static const u32 hns_roce_op_code[] = {
118118
HR_OPC_MAP(ATOMIC_CMP_AND_SWP, ATOM_CMP_AND_SWAP),
119119
HR_OPC_MAP(ATOMIC_FETCH_AND_ADD, ATOM_FETCH_AND_ADD),
120120
HR_OPC_MAP(SEND_WITH_INV, SEND_WITH_INV),
121-
HR_OPC_MAP(LOCAL_INV, LOCAL_INV),
122121
HR_OPC_MAP(MASKED_ATOMIC_CMP_AND_SWP, ATOM_MSK_CMP_AND_SWAP),
123122
HR_OPC_MAP(MASKED_ATOMIC_FETCH_AND_ADD, ATOM_MSK_FETCH_AND_ADD),
124123
HR_OPC_MAP(REG_MR, FAST_REG_PMR),
@@ -559,9 +558,6 @@ static int set_rc_opcode(struct hns_roce_dev *hr_dev,
559558
else
560559
ret = -EOPNOTSUPP;
561560
break;
562-
case IB_WR_LOCAL_INV:
563-
hr_reg_enable(rc_sq_wqe, RC_SEND_WQE_SO);
564-
fallthrough;
565561
case IB_WR_SEND_WITH_INV:
566562
rc_sq_wqe->inv_key = cpu_to_le32(wr->ex.invalidate_rkey);
567563
break;
@@ -3222,7 +3218,6 @@ static int hns_roce_v2_write_mtpt(struct hns_roce_dev *hr_dev,
32223218

32233219
hr_reg_write(mpt_entry, MPT_ST, V2_MPT_ST_VALID);
32243220
hr_reg_write(mpt_entry, MPT_PD, mr->pd);
3225-
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
32263221

32273222
hr_reg_write_bool(mpt_entry, MPT_BIND_EN,
32283223
mr->access & IB_ACCESS_MW_BIND);
@@ -3313,7 +3308,6 @@ static int hns_roce_v2_frmr_write_mtpt(struct hns_roce_dev *hr_dev,
33133308

33143309
hr_reg_enable(mpt_entry, MPT_RA_EN);
33153310
hr_reg_enable(mpt_entry, MPT_R_INV_EN);
3316-
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
33173311

33183312
hr_reg_enable(mpt_entry, MPT_FRE);
33193313
hr_reg_clear(mpt_entry, MPT_MR_MW);
@@ -3345,7 +3339,6 @@ static int hns_roce_v2_mw_write_mtpt(void *mb_buf, struct hns_roce_mw *mw)
33453339
hr_reg_write(mpt_entry, MPT_PD, mw->pdn);
33463340

33473341
hr_reg_enable(mpt_entry, MPT_R_INV_EN);
3348-
hr_reg_enable(mpt_entry, MPT_L_INV_EN);
33493342
hr_reg_enable(mpt_entry, MPT_LW_EN);
33503343

33513344
hr_reg_enable(mpt_entry, MPT_MR_MW);
@@ -3794,7 +3787,6 @@ static const u32 wc_send_op_map[] = {
37943787
HR_WC_OP_MAP(RDMA_READ, RDMA_READ),
37953788
HR_WC_OP_MAP(RDMA_WRITE, RDMA_WRITE),
37963789
HR_WC_OP_MAP(RDMA_WRITE_WITH_IMM, RDMA_WRITE),
3797-
HR_WC_OP_MAP(LOCAL_INV, LOCAL_INV),
37983790
HR_WC_OP_MAP(ATOM_CMP_AND_SWAP, COMP_SWAP),
37993791
HR_WC_OP_MAP(ATOM_FETCH_AND_ADD, FETCH_ADD),
38003792
HR_WC_OP_MAP(ATOM_MSK_CMP_AND_SWAP, MASKED_COMP_SWAP),
@@ -3844,9 +3836,6 @@ static void fill_send_wc(struct ib_wc *wc, struct hns_roce_v2_cqe *cqe)
38443836
case HNS_ROCE_V2_WQE_OP_RDMA_WRITE_WITH_IMM:
38453837
wc->wc_flags |= IB_WC_WITH_IMM;
38463838
break;
3847-
case HNS_ROCE_V2_WQE_OP_LOCAL_INV:
3848-
wc->wc_flags |= IB_WC_WITH_INVALIDATE;
3849-
break;
38503839
case HNS_ROCE_V2_WQE_OP_ATOM_CMP_AND_SWAP:
38513840
case HNS_ROCE_V2_WQE_OP_ATOM_FETCH_AND_ADD:
38523841
case HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP:

drivers/infiniband/hw/hns/hns_roce_hw_v2.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -179,7 +179,6 @@ enum {
179179
HNS_ROCE_V2_WQE_OP_ATOM_MSK_CMP_AND_SWAP = 0x8,
180180
HNS_ROCE_V2_WQE_OP_ATOM_MSK_FETCH_AND_ADD = 0x9,
181181
HNS_ROCE_V2_WQE_OP_FAST_REG_PMR = 0xa,
182-
HNS_ROCE_V2_WQE_OP_LOCAL_INV = 0xb,
183182
HNS_ROCE_V2_WQE_OP_BIND_MW = 0xc,
184183
HNS_ROCE_V2_WQE_OP_MASK = 0x1f,
185184
};
@@ -915,7 +914,6 @@ struct hns_roce_v2_rc_send_wqe {
915914
#define RC_SEND_WQE_OWNER RC_SEND_WQE_FIELD_LOC(7, 7)
916915
#define RC_SEND_WQE_CQE RC_SEND_WQE_FIELD_LOC(8, 8)
917916
#define RC_SEND_WQE_FENCE RC_SEND_WQE_FIELD_LOC(9, 9)
918-
#define RC_SEND_WQE_SO RC_SEND_WQE_FIELD_LOC(10, 10)
919917
#define RC_SEND_WQE_SE RC_SEND_WQE_FIELD_LOC(11, 11)
920918
#define RC_SEND_WQE_INLINE RC_SEND_WQE_FIELD_LOC(12, 12)
921919
#define RC_SEND_WQE_WQE_INDEX RC_SEND_WQE_FIELD_LOC(30, 15)

0 commit comments

Comments
 (0)