Skip to content

Commit f7e97ce

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Many small changes across the subystem, some highlights: - Usual driver cleanups in qedr, siw, erdma, hfi1, mlx4/5, irdma, mthca, hns, and bnxt_re - siw now works over tunnel and other netdevs with a MAC address by removing assumptions about a MAC/GID from the connection manager - "Doorbell Pacing" for bnxt_re - this is a best effort scheme to allow userspace to slow down the doorbell rings if the HW gets full - irdma egress VLAN priority, better QP/WQ sizing - rxe bug fixes in queue draining and srq resizing - Support more ethernet speed options in the core layer - DMABUF support for bnxt_re - Multi-stage MTT support for erdma to allow much bigger MR registrations - A irdma fix with a CVE that came in too late to go to -rc, missing bounds checking for 0 length MRs" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (87 commits) IB/hfi1: Reduce printing of errors during driver shut down RDMA/hfi1: Move user SDMA system memory pinning code to its own file RDMA/hfi1: Use list_for_each_entry() helper RDMA/mlx5: Fix trailing */ formatting in block comment RDMA/rxe: Fix redundant break statement in switch-case. RDMA/efa: Fix wrong resources deallocation order RDMA/siw: Call llist_reverse_order in siw_run_sq RDMA/siw: Correct wrong debug message RDMA/siw: Balance the reference of cep->kref in the error path Revert "IB/isert: Fix incorrect release of isert connection" RDMA/bnxt_re: Fix kernel doc errors RDMA/irdma: Prevent zero-length STAG registration RDMA/erdma: Implement hierarchical MTT RDMA/erdma: Refactor the storage structure of MTT entries RDMA/erdma: Renaming variable names and field names of struct erdma_mem RDMA/hns: Support hns HW stats RDMA/hns: Dump whole QP/CQ/MR resource in raw RDMA/irdma: Add missing kernel-doc in irdma_setup_umode_qp() RDMA/mlx4: Copy union directly RDMA/irdma: Drop unused kernel push code ...
2 parents 2fcbb03 + f5acc36 commit f7e97ce

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+2892
-1670
lines changed

drivers/infiniband/core/cache.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1457,6 +1457,17 @@ static int config_non_roce_gid_cache(struct ib_device *device,
14571457
i);
14581458
goto err;
14591459
}
1460+
1461+
if (rdma_protocol_iwarp(device, port)) {
1462+
struct net_device *ndev;
1463+
1464+
ndev = ib_device_get_netdev(device, port);
1465+
if (!ndev)
1466+
continue;
1467+
RCU_INIT_POINTER(gid_attr.ndev, ndev);
1468+
dev_put(ndev);
1469+
}
1470+
14601471
gid_attr.index = i;
14611472
tprops->subnet_prefix =
14621473
be64_to_cpu(gid_attr.gid.global.subnet_prefix);

drivers/infiniband/core/cma.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -686,30 +686,52 @@ cma_validate_port(struct ib_device *device, u32 port,
686686
struct rdma_id_private *id_priv)
687687
{
688688
struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
689+
const struct ib_gid_attr *sgid_attr = ERR_PTR(-ENODEV);
689690
int bound_if_index = dev_addr->bound_dev_if;
690-
const struct ib_gid_attr *sgid_attr;
691691
int dev_type = dev_addr->dev_type;
692692
struct net_device *ndev = NULL;
693693

694694
if (!rdma_dev_access_netns(device, id_priv->id.route.addr.dev_addr.net))
695-
return ERR_PTR(-ENODEV);
695+
goto out;
696696

697697
if ((dev_type == ARPHRD_INFINIBAND) && !rdma_protocol_ib(device, port))
698-
return ERR_PTR(-ENODEV);
698+
goto out;
699699

700700
if ((dev_type != ARPHRD_INFINIBAND) && rdma_protocol_ib(device, port))
701-
return ERR_PTR(-ENODEV);
701+
goto out;
702+
703+
/*
704+
* For drivers that do not associate more than one net device with
705+
* their gid tables, such as iWARP drivers, it is sufficient to
706+
* return the first table entry.
707+
*
708+
* Other driver classes might be included in the future.
709+
*/
710+
if (rdma_protocol_iwarp(device, port)) {
711+
sgid_attr = rdma_get_gid_attr(device, port, 0);
712+
if (IS_ERR(sgid_attr))
713+
goto out;
714+
715+
rcu_read_lock();
716+
ndev = rcu_dereference(sgid_attr->ndev);
717+
if (!net_eq(dev_net(ndev), dev_addr->net) ||
718+
ndev->ifindex != bound_if_index)
719+
sgid_attr = ERR_PTR(-ENODEV);
720+
rcu_read_unlock();
721+
goto out;
722+
}
702723

703724
if (dev_type == ARPHRD_ETHER && rdma_protocol_roce(device, port)) {
704725
ndev = dev_get_by_index(dev_addr->net, bound_if_index);
705726
if (!ndev)
706-
return ERR_PTR(-ENODEV);
727+
goto out;
707728
} else {
708729
gid_type = IB_GID_TYPE_IB;
709730
}
710731

711732
sgid_attr = rdma_find_gid_by_port(device, gid, gid_type, port, ndev);
712733
dev_put(ndev);
734+
out:
713735
return sgid_attr;
714736
}
715737

drivers/infiniband/core/iwpm_util.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,7 +307,7 @@ int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
307307
struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
308308
u8 nl_client, gfp_t gfp)
309309
{
310-
struct iwpm_nlmsg_request *nlmsg_request = NULL;
310+
struct iwpm_nlmsg_request *nlmsg_request;
311311
unsigned long flags;
312312

313313
nlmsg_request = kzalloc(sizeof(struct iwpm_nlmsg_request), gfp);

drivers/infiniband/core/netlink.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ static bool is_nl_msg_valid(unsigned int type, unsigned int op)
7575
if (type >= RDMA_NL_NUM_CLIENTS)
7676
return false;
7777

78-
return (op < max_num_ops[type]) ? true : false;
78+
return op < max_num_ops[type];
7979
}
8080

8181
static const struct rdma_nl_cbs *

drivers/infiniband/core/uverbs_main.c

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -72,12 +72,23 @@ enum {
7272
#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR)
7373

7474
static dev_t dynamic_uverbs_dev;
75-
static struct class *uverbs_class;
7675

7776
static DEFINE_IDA(uverbs_ida);
7877
static int ib_uverbs_add_one(struct ib_device *device);
7978
static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
8079

80+
static char *uverbs_devnode(const struct device *dev, umode_t *mode)
81+
{
82+
if (mode)
83+
*mode = 0666;
84+
return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
85+
}
86+
87+
static const struct class uverbs_class = {
88+
.name = "infiniband_verbs",
89+
.devnode = uverbs_devnode,
90+
};
91+
8192
/*
8293
* Must be called with the ufile->device->disassociate_srcu held, and the lock
8394
* must be held until use of the ucontext is finished.
@@ -1117,7 +1128,7 @@ static int ib_uverbs_add_one(struct ib_device *device)
11171128
}
11181129

11191130
device_initialize(&uverbs_dev->dev);
1120-
uverbs_dev->dev.class = uverbs_class;
1131+
uverbs_dev->dev.class = &uverbs_class;
11211132
uverbs_dev->dev.parent = device->dev.parent;
11221133
uverbs_dev->dev.release = ib_uverbs_release_dev;
11231134
uverbs_dev->groups[0] = &dev_attr_group;
@@ -1235,13 +1246,6 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
12351246
put_device(&uverbs_dev->dev);
12361247
}
12371248

1238-
static char *uverbs_devnode(const struct device *dev, umode_t *mode)
1239-
{
1240-
if (mode)
1241-
*mode = 0666;
1242-
return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
1243-
}
1244-
12451249
static int __init ib_uverbs_init(void)
12461250
{
12471251
int ret;
@@ -1262,16 +1266,13 @@ static int __init ib_uverbs_init(void)
12621266
goto out_alloc;
12631267
}
12641268

1265-
uverbs_class = class_create("infiniband_verbs");
1266-
if (IS_ERR(uverbs_class)) {
1267-
ret = PTR_ERR(uverbs_class);
1269+
ret = class_register(&uverbs_class);
1270+
if (ret) {
12681271
pr_err("user_verbs: couldn't create class infiniband_verbs\n");
12691272
goto out_chrdev;
12701273
}
12711274

1272-
uverbs_class->devnode = uverbs_devnode;
1273-
1274-
ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
1275+
ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr);
12751276
if (ret) {
12761277
pr_err("user_verbs: couldn't create abi_version attribute\n");
12771278
goto out_class;
@@ -1286,7 +1287,7 @@ static int __init ib_uverbs_init(void)
12861287
return 0;
12871288

12881289
out_class:
1289-
class_destroy(uverbs_class);
1290+
class_unregister(&uverbs_class);
12901291

12911292
out_chrdev:
12921293
unregister_chrdev_region(dynamic_uverbs_dev,
@@ -1303,7 +1304,7 @@ static int __init ib_uverbs_init(void)
13031304
static void __exit ib_uverbs_cleanup(void)
13041305
{
13051306
ib_unregister_client(&uverbs_client);
1306-
class_destroy(uverbs_class);
1307+
class_unregister(&uverbs_class);
13071308
unregister_chrdev_region(IB_UVERBS_BASE_DEV,
13081309
IB_UVERBS_NUM_FIXED_MINOR);
13091310
unregister_chrdev_region(dynamic_uverbs_dev,

drivers/infiniband/core/uverbs_std_types_counters.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ static int UVERBS_HANDLER(UVERBS_METHOD_COUNTERS_READ)(
107107
return ret;
108108

109109
uattr = uverbs_attr_get(attrs, UVERBS_ATTR_READ_COUNTERS_BUFF);
110+
if (IS_ERR(uattr))
111+
return PTR_ERR(uattr);
110112
read_attr.ncounters = uattr->ptr_attr.len / sizeof(u64);
111113
read_attr.counters_buff = uverbs_zalloc(
112114
attrs, array_size(read_attr.ncounters, sizeof(u64)));

drivers/infiniband/core/verbs.c

Lines changed: 88 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1880,6 +1880,89 @@ int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
18801880
}
18811881
EXPORT_SYMBOL(ib_modify_qp_with_udata);
18821882

1883+
static void ib_get_width_and_speed(u32 netdev_speed, u32 lanes,
1884+
u16 *speed, u8 *width)
1885+
{
1886+
if (!lanes) {
1887+
if (netdev_speed <= SPEED_1000) {
1888+
*width = IB_WIDTH_1X;
1889+
*speed = IB_SPEED_SDR;
1890+
} else if (netdev_speed <= SPEED_10000) {
1891+
*width = IB_WIDTH_1X;
1892+
*speed = IB_SPEED_FDR10;
1893+
} else if (netdev_speed <= SPEED_20000) {
1894+
*width = IB_WIDTH_4X;
1895+
*speed = IB_SPEED_DDR;
1896+
} else if (netdev_speed <= SPEED_25000) {
1897+
*width = IB_WIDTH_1X;
1898+
*speed = IB_SPEED_EDR;
1899+
} else if (netdev_speed <= SPEED_40000) {
1900+
*width = IB_WIDTH_4X;
1901+
*speed = IB_SPEED_FDR10;
1902+
} else if (netdev_speed <= SPEED_50000) {
1903+
*width = IB_WIDTH_2X;
1904+
*speed = IB_SPEED_EDR;
1905+
} else if (netdev_speed <= SPEED_100000) {
1906+
*width = IB_WIDTH_4X;
1907+
*speed = IB_SPEED_EDR;
1908+
} else if (netdev_speed <= SPEED_200000) {
1909+
*width = IB_WIDTH_4X;
1910+
*speed = IB_SPEED_HDR;
1911+
} else {
1912+
*width = IB_WIDTH_4X;
1913+
*speed = IB_SPEED_NDR;
1914+
}
1915+
1916+
return;
1917+
}
1918+
1919+
switch (lanes) {
1920+
case 1:
1921+
*width = IB_WIDTH_1X;
1922+
break;
1923+
case 2:
1924+
*width = IB_WIDTH_2X;
1925+
break;
1926+
case 4:
1927+
*width = IB_WIDTH_4X;
1928+
break;
1929+
case 8:
1930+
*width = IB_WIDTH_8X;
1931+
break;
1932+
case 12:
1933+
*width = IB_WIDTH_12X;
1934+
break;
1935+
default:
1936+
*width = IB_WIDTH_1X;
1937+
}
1938+
1939+
switch (netdev_speed / lanes) {
1940+
case SPEED_2500:
1941+
*speed = IB_SPEED_SDR;
1942+
break;
1943+
case SPEED_5000:
1944+
*speed = IB_SPEED_DDR;
1945+
break;
1946+
case SPEED_10000:
1947+
*speed = IB_SPEED_FDR10;
1948+
break;
1949+
case SPEED_14000:
1950+
*speed = IB_SPEED_FDR;
1951+
break;
1952+
case SPEED_25000:
1953+
*speed = IB_SPEED_EDR;
1954+
break;
1955+
case SPEED_50000:
1956+
*speed = IB_SPEED_HDR;
1957+
break;
1958+
case SPEED_100000:
1959+
*speed = IB_SPEED_NDR;
1960+
break;
1961+
default:
1962+
*speed = IB_SPEED_SDR;
1963+
}
1964+
}
1965+
18831966
int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
18841967
{
18851968
int rc;
@@ -1904,29 +1987,13 @@ int ib_get_eth_speed(struct ib_device *dev, u32 port_num, u16 *speed, u8 *width)
19041987
netdev_speed = lksettings.base.speed;
19051988
} else {
19061989
netdev_speed = SPEED_1000;
1907-
pr_warn("%s speed is unknown, defaulting to %u\n", netdev->name,
1908-
netdev_speed);
1990+
if (rc)
1991+
pr_warn("%s speed is unknown, defaulting to %u\n",
1992+
netdev->name, netdev_speed);
19091993
}
19101994

1911-
if (netdev_speed <= SPEED_1000) {
1912-
*width = IB_WIDTH_1X;
1913-
*speed = IB_SPEED_SDR;
1914-
} else if (netdev_speed <= SPEED_10000) {
1915-
*width = IB_WIDTH_1X;
1916-
*speed = IB_SPEED_FDR10;
1917-
} else if (netdev_speed <= SPEED_20000) {
1918-
*width = IB_WIDTH_4X;
1919-
*speed = IB_SPEED_DDR;
1920-
} else if (netdev_speed <= SPEED_25000) {
1921-
*width = IB_WIDTH_1X;
1922-
*speed = IB_SPEED_EDR;
1923-
} else if (netdev_speed <= SPEED_40000) {
1924-
*width = IB_WIDTH_4X;
1925-
*speed = IB_SPEED_FDR10;
1926-
} else {
1927-
*width = IB_WIDTH_4X;
1928-
*speed = IB_SPEED_EDR;
1929-
}
1995+
ib_get_width_and_speed(netdev_speed, lksettings.lanes,
1996+
speed, width);
19301997

19311998
return 0;
19321999
}

drivers/infiniband/hw/bnxt_re/bnxt_re.h

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,12 +112,34 @@ struct bnxt_re_gsi_context {
112112
#define BNXT_RE_NQ_IDX 1
113113
#define BNXT_RE_GEN_P5_MAX_VF 64
114114

115+
struct bnxt_re_pacing {
116+
u64 dbr_db_fifo_reg_off;
117+
void *dbr_page;
118+
u64 dbr_bar_addr;
119+
u32 pacing_algo_th;
120+
u32 do_pacing_save;
121+
u32 dbq_pacing_time; /* ms */
122+
u32 dbr_def_do_pacing;
123+
bool dbr_pacing;
124+
struct mutex dbq_lock; /* synchronize db pacing algo */
125+
};
126+
127+
#define BNXT_RE_MAX_DBR_DO_PACING 0xFFFF
128+
#define BNXT_RE_DBR_PACING_TIME 5 /* ms */
129+
#define BNXT_RE_PACING_ALGO_THRESHOLD 250 /* Entries in DB FIFO */
130+
#define BNXT_RE_PACING_ALARM_TH_MULTIPLE 2 /* Multiple of pacing algo threshold */
131+
/* Default do_pacing value when there is no congestion */
132+
#define BNXT_RE_DBR_DO_PACING_NO_CONGESTION 0x7F /* 1 in 512 probability */
133+
#define BNXT_RE_DB_FIFO_ROOM_MASK 0x1FFF8000
134+
#define BNXT_RE_MAX_FIFO_DEPTH 0x2c00
135+
#define BNXT_RE_DB_FIFO_ROOM_SHIFT 15
136+
#define BNXT_RE_GRC_FIFO_REG_BASE 0x2000
137+
115138
struct bnxt_re_dev {
116139
struct ib_device ibdev;
117140
struct list_head list;
118141
unsigned long flags;
119142
#define BNXT_RE_FLAG_NETDEV_REGISTERED 0
120-
#define BNXT_RE_FLAG_GOT_MSIX 2
121143
#define BNXT_RE_FLAG_HAVE_L2_REF 3
122144
#define BNXT_RE_FLAG_RCFW_CHANNEL_EN 4
123145
#define BNXT_RE_FLAG_QOS_WORK_REG 5
@@ -152,16 +174,9 @@ struct bnxt_re_dev {
152174
struct bnxt_qplib_res qplib_res;
153175
struct bnxt_qplib_dpi dpi_privileged;
154176

155-
atomic_t qp_count;
156177
struct mutex qp_lock; /* protect qp list */
157178
struct list_head qp_list;
158179

159-
atomic_t cq_count;
160-
atomic_t srq_count;
161-
atomic_t mr_count;
162-
atomic_t mw_count;
163-
atomic_t ah_count;
164-
atomic_t pd_count;
165180
/* Max of 2 lossless traffic class supported per port */
166181
u16 cosq[2];
167182

@@ -171,6 +186,9 @@ struct bnxt_re_dev {
171186
atomic_t nq_alloc_cnt;
172187
u32 is_virtfn;
173188
u32 num_vfs;
189+
struct bnxt_re_pacing pacing;
190+
struct work_struct dbq_fifo_check_work;
191+
struct delayed_work dbq_pacing_work;
174192
};
175193

176194
#define to_bnxt_re_dev(ptr, member) \
@@ -181,6 +199,7 @@ struct bnxt_re_dev {
181199
#define BNXT_RE_ROCEV2_IPV6_PACKET 3
182200

183201
#define BNXT_RE_CHECK_RC(x) ((x) && ((x) != -ETIMEDOUT))
202+
void bnxt_re_pacing_alert(struct bnxt_re_dev *rdev);
184203

185204
static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
186205
{

0 commit comments

Comments
 (0)