Skip to content

Commit 2b5715f

Browse files
nmoreyjgunthorpe
authored andcommitted
RDMA/srp: Fix support for unpopulated and unbalanced NUMA nodes
The current code computes a number of channels per SRP target and spreads them equally across all online NUMA nodes. Each channel is then assigned a CPU within this node. In the case of unbalanced, or even unpopulated nodes, some channels do not get a CPU associated and thus do not get connected. This causes the SRP connection to fail. This patch solves the issue by rewriting channel computation and allocation: - Drop channel to node/CPU association as it had no real effect on locality but added unnecessary complexity. - Tweak the number of channels allocated to reduce CPU contention when possible: - Up to one channel per CPU (instead of up to 4 by node) - At least 4 channels per node, unless ch_count module parameter is used. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Nicolas Morey-Chaisemartin <[email protected]> Reviewed-by: Bart Van Assche <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent 68ad4d1 commit 2b5715f

File tree

1 file changed

+45
-65
lines changed

1 file changed

+45
-65
lines changed

drivers/infiniband/ulp/srp/ib_srp.c

Lines changed: 45 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev,
36283628
struct srp_rdma_ch *ch;
36293629
struct srp_device *srp_dev = host->srp_dev;
36303630
struct ib_device *ibdev = srp_dev->dev;
3631-
int ret, node_idx, node, cpu, i;
3631+
int ret, i, ch_idx;
36323632
unsigned int max_sectors_per_mr, mr_per_cmd = 0;
36333633
bool multich = false;
36343634
uint32_t max_iu_len;
@@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct device *dev,
37533753
goto out;
37543754

37553755
ret = -ENOMEM;
3756-
if (target->ch_count == 0)
3756+
if (target->ch_count == 0) {
37573757
target->ch_count =
3758-
max_t(unsigned int, num_online_nodes(),
3759-
min(ch_count ?:
3760-
min(4 * num_online_nodes(),
3761-
ibdev->num_comp_vectors),
3762-
num_online_cpus()));
3758+
min(ch_count ?:
3759+
max(4 * num_online_nodes(),
3760+
ibdev->num_comp_vectors),
3761+
num_online_cpus());
3762+
}
3763+
37633764
target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
37643765
GFP_KERNEL);
37653766
if (!target->ch)
37663767
goto out;
37673768

3768-
node_idx = 0;
3769-
for_each_online_node(node) {
3770-
const int ch_start = (node_idx * target->ch_count /
3771-
num_online_nodes());
3772-
const int ch_end = ((node_idx + 1) * target->ch_count /
3773-
num_online_nodes());
3774-
const int cv_start = node_idx * ibdev->num_comp_vectors /
3775-
num_online_nodes();
3776-
const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3777-
num_online_nodes();
3778-
int cpu_idx = 0;
3779-
3780-
for_each_online_cpu(cpu) {
3781-
if (cpu_to_node(cpu) != node)
3782-
continue;
3783-
if (ch_start + cpu_idx >= ch_end)
3784-
continue;
3785-
ch = &target->ch[ch_start + cpu_idx];
3786-
ch->target = target;
3787-
ch->comp_vector = cv_start == cv_end ? cv_start :
3788-
cv_start + cpu_idx % (cv_end - cv_start);
3789-
spin_lock_init(&ch->lock);
3790-
INIT_LIST_HEAD(&ch->free_tx);
3791-
ret = srp_new_cm_id(ch);
3792-
if (ret)
3793-
goto err_disconnect;
3769+
for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
3770+
ch = &target->ch[ch_idx];
3771+
ch->target = target;
3772+
ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
3773+
spin_lock_init(&ch->lock);
3774+
INIT_LIST_HEAD(&ch->free_tx);
3775+
ret = srp_new_cm_id(ch);
3776+
if (ret)
3777+
goto err_disconnect;
37943778

3795-
ret = srp_create_ch_ib(ch);
3796-
if (ret)
3797-
goto err_disconnect;
3779+
ret = srp_create_ch_ib(ch);
3780+
if (ret)
3781+
goto err_disconnect;
37983782

3799-
ret = srp_alloc_req_data(ch);
3800-
if (ret)
3801-
goto err_disconnect;
3783+
ret = srp_alloc_req_data(ch);
3784+
if (ret)
3785+
goto err_disconnect;
38023786

3803-
ret = srp_connect_ch(ch, max_iu_len, multich);
3804-
if (ret) {
3805-
char dst[64];
3806-
3807-
if (target->using_rdma_cm)
3808-
snprintf(dst, sizeof(dst), "%pIS",
3809-
&target->rdma_cm.dst);
3810-
else
3811-
snprintf(dst, sizeof(dst), "%pI6",
3812-
target->ib_cm.orig_dgid.raw);
3813-
shost_printk(KERN_ERR, target->scsi_host,
3814-
PFX "Connection %d/%d to %s failed\n",
3815-
ch_start + cpu_idx,
3816-
target->ch_count, dst);
3817-
if (node_idx == 0 && cpu_idx == 0) {
3818-
goto free_ch;
3819-
} else {
3820-
srp_free_ch_ib(target, ch);
3821-
srp_free_req_data(target, ch);
3822-
target->ch_count = ch - target->ch;
3823-
goto connected;
3824-
}
3825-
}
3787+
ret = srp_connect_ch(ch, max_iu_len, multich);
3788+
if (ret) {
3789+
char dst[64];
38263790

3827-
multich = true;
3828-
cpu_idx++;
3791+
if (target->using_rdma_cm)
3792+
snprintf(dst, sizeof(dst), "%pIS",
3793+
&target->rdma_cm.dst);
3794+
else
3795+
snprintf(dst, sizeof(dst), "%pI6",
3796+
target->ib_cm.orig_dgid.raw);
3797+
shost_printk(KERN_ERR, target->scsi_host,
3798+
PFX "Connection %d/%d to %s failed\n",
3799+
ch_idx,
3800+
target->ch_count, dst);
3801+
if (ch_idx == 0) {
3802+
goto free_ch;
3803+
} else {
3804+
srp_free_ch_ib(target, ch);
3805+
srp_free_req_data(target, ch);
3806+
target->ch_count = ch - target->ch;
3807+
goto connected;
3808+
}
38293809
}
3830-
node_idx++;
3810+
multich = true;
38313811
}
38323812

38333813
connected:

0 commit comments

Comments
 (0)