Skip to content

Commit 3e93c0b

Browse files
committed
Merge branch 'erdma' into rdma.git for-next
Cheng Xu says ==================== This v14 patch set introduces the Elastic RDMA Adapter (ERDMA) driver, which released in Apsara Conference 2021 by Alibaba. The PR of ERDMA userspace provider has already been created [1]. ERDMA enables large-scale RDMA acceleration capability in Alibaba ECS environment, initially offered in g7re instance. It can improve the efficiency of large-scale distributed computing and communication significantly and expand dynamically with the cluster scale of Alibaba Cloud. ERDMA is a RDMA networking adapter based on the Alibaba MOC hardware. It works in the VPC network environment (overlay network), and uses iWarp transport protocol. ERDMA supports reliable connection (RC). ERDMA also supports both kernel space and user space verbs. Now we have already supported HPC/AI applications with libfabric, NoF and some other internal verbs libraries, such as xrdma, epsl, etc,. For the ECS instance with RDMA enabled, our MOC hardware generates two kinds of PCI devices: one for ERDMA, and one for the original net device (virtio-net). They are separated PCI devices. ==================== * branch 'erdma': RDMA/erdma: Add driver to kernel build environment RDMA/erdma: Add the ABI definitions RDMA/erdma: Add the erdma module RDMA/erdma: Add connection management (CM) support RDMA/erdma: Add verbs implementation RDMA/erdma: Add verbs header file RDMA/erdma: Add event queue implementation RDMA/erdma: Add cmdq implementation RDMA/erdma: Add main include file RDMA/erdma: Add the hardware related definitions RDMA: Add ERDMA to rdma_driver_id definition Signed-off-by: Jason Gunthorpe <[email protected]>
2 parents 0113780 + ca7fd6c commit 3e93c0b

File tree

18 files changed

+6478
-7
lines changed

18 files changed

+6478
-7
lines changed

MAINTAINERS

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,14 @@ S: Maintained
733733
F: Documentation/i2c/busses/i2c-ali1563.rst
734734
F: drivers/i2c/busses/i2c-ali1563.c
735735

736+
ALIBABA ELASTIC RDMA DRIVER
737+
M: Cheng Xu <[email protected]>
738+
M: Kai Shen <[email protected]>
739+
740+
S: Supported
741+
F: drivers/infiniband/hw/erdma
742+
F: include/uapi/rdma/erdma-abi.h
743+
736744
ALIENWARE WMI DRIVER
737745
738746
S: Maintained

drivers/infiniband/Kconfig

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,20 +78,21 @@ config INFINIBAND_VIRT_DMA
7878
def_bool !HIGHMEM
7979

8080
if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
81-
source "drivers/infiniband/hw/mthca/Kconfig"
82-
source "drivers/infiniband/hw/qib/Kconfig"
81+
source "drivers/infiniband/hw/bnxt_re/Kconfig"
8382
source "drivers/infiniband/hw/cxgb4/Kconfig"
8483
source "drivers/infiniband/hw/efa/Kconfig"
84+
source "drivers/infiniband/hw/erdma/Kconfig"
85+
source "drivers/infiniband/hw/hfi1/Kconfig"
86+
source "drivers/infiniband/hw/hns/Kconfig"
8587
source "drivers/infiniband/hw/irdma/Kconfig"
8688
source "drivers/infiniband/hw/mlx4/Kconfig"
8789
source "drivers/infiniband/hw/mlx5/Kconfig"
90+
source "drivers/infiniband/hw/mthca/Kconfig"
8891
source "drivers/infiniband/hw/ocrdma/Kconfig"
89-
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
90-
source "drivers/infiniband/hw/usnic/Kconfig"
91-
source "drivers/infiniband/hw/hns/Kconfig"
92-
source "drivers/infiniband/hw/bnxt_re/Kconfig"
93-
source "drivers/infiniband/hw/hfi1/Kconfig"
9492
source "drivers/infiniband/hw/qedr/Kconfig"
93+
source "drivers/infiniband/hw/qib/Kconfig"
94+
source "drivers/infiniband/hw/usnic/Kconfig"
95+
source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
9596
source "drivers/infiniband/sw/rdmavt/Kconfig"
9697
source "drivers/infiniband/sw/rxe/Kconfig"
9798
source "drivers/infiniband/sw/siw/Kconfig"

drivers/infiniband/hw/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,4 @@ obj-$(CONFIG_INFINIBAND_HFI1) += hfi1/
1313
obj-$(CONFIG_INFINIBAND_HNS) += hns/
1414
obj-$(CONFIG_INFINIBAND_QEDR) += qedr/
1515
obj-$(CONFIG_INFINIBAND_BNXT_RE) += bnxt_re/
16+
obj-$(CONFIG_INFINIBAND_ERDMA) += erdma/

drivers/infiniband/hw/erdma/Kconfig

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# SPDX-License-Identifier: GPL-2.0-only
2+
config INFINIBAND_ERDMA
3+
tristate "Alibaba Elastic RDMA Adapter (ERDMA) support"
4+
depends on PCI_MSI && 64BIT
5+
depends on INFINIBAND_ADDR_TRANS
6+
depends on INFINIBAND_USER_ACCESS
7+
help
8+
This is a RDMA/iWarp driver for Alibaba Elastic RDMA Adapter(ERDMA),
9+
which supports RDMA features in Alibaba cloud environment.
10+
11+
To compile this driver as module, choose M here. The module will be
12+
called erdma.

drivers/infiniband/hw/erdma/Makefile

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# SPDX-License-Identifier: GPL-2.0
2+
obj-$(CONFIG_INFINIBAND_ERDMA) := erdma.o
3+
4+
erdma-y := erdma_cm.o erdma_main.o erdma_cmdq.o erdma_cq.o erdma_verbs.o erdma_qp.o erdma_eq.o

drivers/infiniband/hw/erdma/erdma.h

Lines changed: 287 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,287 @@
1+
/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
2+
3+
/* Authors: Cheng Xu <[email protected]> */
4+
/* Kai Shen <[email protected]> */
5+
/* Copyright (c) 2020-2022, Alibaba Group. */
6+
7+
#ifndef __ERDMA_H__
8+
#define __ERDMA_H__
9+
10+
#include <linux/bitfield.h>
11+
#include <linux/netdevice.h>
12+
#include <linux/xarray.h>
13+
#include <rdma/ib_verbs.h>
14+
15+
#include "erdma_hw.h"
16+
17+
#define DRV_MODULE_NAME "erdma"
18+
#define ERDMA_NODE_DESC "Elastic RDMA(iWARP) stack"
19+
20+
struct erdma_eq {
21+
void *qbuf;
22+
dma_addr_t qbuf_dma_addr;
23+
24+
spinlock_t lock;
25+
26+
u32 depth;
27+
28+
u16 ci;
29+
u16 rsvd;
30+
31+
atomic64_t event_num;
32+
atomic64_t notify_num;
33+
34+
u64 __iomem *db_addr;
35+
u64 *db_record;
36+
};
37+
38+
struct erdma_cmdq_sq {
39+
void *qbuf;
40+
dma_addr_t qbuf_dma_addr;
41+
42+
spinlock_t lock;
43+
44+
u32 depth;
45+
u16 ci;
46+
u16 pi;
47+
48+
u16 wqebb_cnt;
49+
50+
u64 *db_record;
51+
};
52+
53+
struct erdma_cmdq_cq {
54+
void *qbuf;
55+
dma_addr_t qbuf_dma_addr;
56+
57+
spinlock_t lock;
58+
59+
u32 depth;
60+
u32 ci;
61+
u32 cmdsn;
62+
63+
u64 *db_record;
64+
65+
atomic64_t armed_num;
66+
};
67+
68+
enum {
69+
ERDMA_CMD_STATUS_INIT,
70+
ERDMA_CMD_STATUS_ISSUED,
71+
ERDMA_CMD_STATUS_FINISHED,
72+
ERDMA_CMD_STATUS_TIMEOUT
73+
};
74+
75+
struct erdma_comp_wait {
76+
struct completion wait_event;
77+
u32 cmd_status;
78+
u32 ctx_id;
79+
u16 sq_pi;
80+
u8 comp_status;
81+
u8 rsvd;
82+
u32 comp_data[4];
83+
};
84+
85+
enum {
86+
ERDMA_CMDQ_STATE_OK_BIT = 0,
87+
ERDMA_CMDQ_STATE_TIMEOUT_BIT = 1,
88+
ERDMA_CMDQ_STATE_CTX_ERR_BIT = 2,
89+
};
90+
91+
#define ERDMA_CMDQ_TIMEOUT_MS 15000
92+
#define ERDMA_REG_ACCESS_WAIT_MS 20
93+
#define ERDMA_WAIT_DEV_DONE_CNT 500
94+
95+
struct erdma_cmdq {
96+
unsigned long *comp_wait_bitmap;
97+
struct erdma_comp_wait *wait_pool;
98+
spinlock_t lock;
99+
100+
bool use_event;
101+
102+
struct erdma_cmdq_sq sq;
103+
struct erdma_cmdq_cq cq;
104+
struct erdma_eq eq;
105+
106+
unsigned long state;
107+
108+
struct semaphore credits;
109+
u16 max_outstandings;
110+
};
111+
112+
#define COMPROMISE_CC ERDMA_CC_CUBIC
113+
enum erdma_cc_alg {
114+
ERDMA_CC_NEWRENO = 0,
115+
ERDMA_CC_CUBIC,
116+
ERDMA_CC_HPCC_RTT,
117+
ERDMA_CC_HPCC_ECN,
118+
ERDMA_CC_HPCC_INT,
119+
ERDMA_CC_METHODS_NUM
120+
};
121+
122+
struct erdma_devattr {
123+
u32 fw_version;
124+
125+
unsigned char peer_addr[ETH_ALEN];
126+
127+
int numa_node;
128+
enum erdma_cc_alg cc;
129+
u32 grp_num;
130+
u32 irq_num;
131+
132+
bool disable_dwqe;
133+
u16 dwqe_pages;
134+
u16 dwqe_entries;
135+
136+
u32 max_qp;
137+
u32 max_send_wr;
138+
u32 max_recv_wr;
139+
u32 max_ord;
140+
u32 max_ird;
141+
142+
u32 max_send_sge;
143+
u32 max_recv_sge;
144+
u32 max_sge_rd;
145+
u32 max_cq;
146+
u32 max_cqe;
147+
u64 max_mr_size;
148+
u32 max_mr;
149+
u32 max_pd;
150+
u32 max_mw;
151+
u32 local_dma_key;
152+
};
153+
154+
#define ERDMA_IRQNAME_SIZE 50
155+
156+
struct erdma_irq {
157+
char name[ERDMA_IRQNAME_SIZE];
158+
u32 msix_vector;
159+
cpumask_t affinity_hint_mask;
160+
};
161+
162+
struct erdma_eq_cb {
163+
bool ready;
164+
void *dev; /* All EQs use this fields to get erdma_dev struct */
165+
struct erdma_irq irq;
166+
struct erdma_eq eq;
167+
struct tasklet_struct tasklet;
168+
};
169+
170+
struct erdma_resource_cb {
171+
unsigned long *bitmap;
172+
spinlock_t lock;
173+
u32 next_alloc_idx;
174+
u32 max_cap;
175+
};
176+
177+
enum {
178+
ERDMA_RES_TYPE_PD = 0,
179+
ERDMA_RES_TYPE_STAG_IDX = 1,
180+
ERDMA_RES_CNT = 2,
181+
};
182+
183+
#define ERDMA_EXTRA_BUFFER_SIZE ERDMA_DB_SIZE
184+
#define WARPPED_BUFSIZE(size) ((size) + ERDMA_EXTRA_BUFFER_SIZE)
185+
186+
struct erdma_dev {
187+
struct ib_device ibdev;
188+
struct net_device *netdev;
189+
struct pci_dev *pdev;
190+
struct notifier_block netdev_nb;
191+
192+
resource_size_t func_bar_addr;
193+
resource_size_t func_bar_len;
194+
u8 __iomem *func_bar;
195+
196+
struct erdma_devattr attrs;
197+
/* physical port state (only one port per device) */
198+
enum ib_port_state state;
199+
200+
/* cmdq and aeq use the same msix vector */
201+
struct erdma_irq comm_irq;
202+
struct erdma_cmdq cmdq;
203+
struct erdma_eq aeq;
204+
struct erdma_eq_cb ceqs[ERDMA_NUM_MSIX_VEC - 1];
205+
206+
spinlock_t lock;
207+
struct erdma_resource_cb res_cb[ERDMA_RES_CNT];
208+
struct xarray qp_xa;
209+
struct xarray cq_xa;
210+
211+
u32 next_alloc_qpn;
212+
u32 next_alloc_cqn;
213+
214+
spinlock_t db_bitmap_lock;
215+
/* We provide max 64 uContexts that each has one SQ doorbell Page. */
216+
DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT);
217+
/*
218+
* We provide max 496 uContexts that each has one SQ normal Db,
219+
* and one directWQE db。
220+
*/
221+
DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT);
222+
223+
atomic_t num_ctx;
224+
struct list_head cep_list;
225+
};
226+
227+
static inline void *get_queue_entry(void *qbuf, u32 idx, u32 depth, u32 shift)
228+
{
229+
idx &= (depth - 1);
230+
231+
return qbuf + (idx << shift);
232+
}
233+
234+
static inline struct erdma_dev *to_edev(struct ib_device *ibdev)
235+
{
236+
return container_of(ibdev, struct erdma_dev, ibdev);
237+
}
238+
239+
static inline u32 erdma_reg_read32(struct erdma_dev *dev, u32 reg)
240+
{
241+
return readl(dev->func_bar + reg);
242+
}
243+
244+
static inline u64 erdma_reg_read64(struct erdma_dev *dev, u32 reg)
245+
{
246+
return readq(dev->func_bar + reg);
247+
}
248+
249+
static inline void erdma_reg_write32(struct erdma_dev *dev, u32 reg, u32 value)
250+
{
251+
writel(value, dev->func_bar + reg);
252+
}
253+
254+
static inline void erdma_reg_write64(struct erdma_dev *dev, u32 reg, u64 value)
255+
{
256+
writeq(value, dev->func_bar + reg);
257+
}
258+
259+
static inline u32 erdma_reg_read32_filed(struct erdma_dev *dev, u32 reg,
260+
u32 filed_mask)
261+
{
262+
u32 val = erdma_reg_read32(dev, reg);
263+
264+
return FIELD_GET(filed_mask, val);
265+
}
266+
267+
int erdma_cmdq_init(struct erdma_dev *dev);
268+
void erdma_finish_cmdq_init(struct erdma_dev *dev);
269+
void erdma_cmdq_destroy(struct erdma_dev *dev);
270+
271+
void erdma_cmdq_build_reqhdr(u64 *hdr, u32 mod, u32 op);
272+
int erdma_post_cmd_wait(struct erdma_cmdq *cmdq, u64 *req, u32 req_size,
273+
u64 *resp0, u64 *resp1);
274+
void erdma_cmdq_completion_handler(struct erdma_cmdq *cmdq);
275+
276+
int erdma_ceqs_init(struct erdma_dev *dev);
277+
void erdma_ceqs_uninit(struct erdma_dev *dev);
278+
void notify_eq(struct erdma_eq *eq);
279+
void *get_next_valid_eqe(struct erdma_eq *eq);
280+
281+
int erdma_aeq_init(struct erdma_dev *dev);
282+
void erdma_aeq_destroy(struct erdma_dev *dev);
283+
284+
void erdma_aeq_event_handler(struct erdma_dev *dev);
285+
void erdma_ceq_completion_handler(struct erdma_eq_cb *ceq_cb);
286+
287+
#endif

0 commit comments

Comments
 (0)