Skip to content

Commit 2ef2763

Browse files
committed
btl/openib: fix inconsistency in the default settings
This commit fixes an inconsistency between btl_openib_receive_queues, btl_openib_max_send_size and btl_openib_eager_limit. Before this commit if the ini file specified a set of default receive queues that happen to not contain one large enough for the default max_send_size of eager_limit users would see an error like: WARNING: The largest queue pair buffer size specified in the btl_openib_receive_queues MCA parameter is smaller than the maximum send size (i.e., the btl_openib_max_send_size MCA parameter), meaning that no queue is large enough to receive the largest possible incoming message fragment. The OpenFabrics (openib) BTL will therefore be deactivated for this run. Local host: somehost Largest buffer size: 65536 Maximum send fragment size: 131072 This commit adds code that detects the source of the max_send_size and eager_limit values and sets either or both of them to the size supported by the largest queue pair if both 1) the value is larger than the largest queue pair size, and 2) the value was not set by the user or a MCA configuration file. Signed-off-by: Nathan Hjelm <[email protected]>
1 parent f89cc3c commit 2ef2763

File tree

1 file changed

+44
-9
lines changed

1 file changed

+44
-9
lines changed

opal/mca/btl/openib/btl_openib_component.c

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,16 @@ static void init_apm_port(mca_btl_openib_device_t *device, int port, uint16_t li
11781178
}
11791179
}
11801180

1181+
static int get_var_source (const char *var_name, mca_base_var_source_t *source)
1182+
{
1183+
int vari = mca_base_var_find ("opal", "btl", "openib", var_name);
1184+
if (0 > vari) {
1185+
return vari;
1186+
}
1187+
1188+
return mca_base_var_get_value (vari, NULL, source, NULL);
1189+
}
1190+
11811191
static int setup_qps(void)
11821192
{
11831193
char **queues, **params = NULL;
@@ -1386,6 +1396,33 @@ static int setup_qps(void)
13861396
mca_btl_openib_module.super.btl_max_send_size) ?
13871397
mca_btl_openib_module.super.btl_eager_limit :
13881398
mca_btl_openib_module.super.btl_max_send_size;
1399+
1400+
if (max_qp_size < max_size_needed) {
1401+
mca_base_var_source_t eager_source = MCA_BASE_VAR_SOURCE_DEFAULT;
1402+
mca_base_var_source_t max_send_source = MCA_BASE_VAR_SOURCE_DEFAULT;
1403+
1404+
(void) get_var_source ("max_send_size", &max_send_source);
1405+
(void) get_var_source ("eager_limit", &eager_source);
1406+
1407+
/* the largest queue pair is too small for either the max send size or eager
1408+
* limit. check where we got the max_send_size and eager_limit and adjust if
1409+
* the user did not specify one or the other. */
1410+
if (mca_btl_openib_module.super.btl_eager_limit > max_qp_size &&
1411+
MCA_BASE_VAR_SOURCE_DEFAULT == eager_source) {
1412+
mca_btl_openib_module.super.btl_eager_limit = max_qp_size;
1413+
}
1414+
1415+
if (mca_btl_openib_module.super.btl_max_send_size > max_qp_size &&
1416+
MCA_BASE_VAR_SOURCE_DEFAULT == max_send_source) {
1417+
mca_btl_openib_module.super.btl_max_send_size = max_qp_size;
1418+
}
1419+
1420+
max_size_needed = (mca_btl_openib_module.super.btl_eager_limit >
1421+
mca_btl_openib_module.super.btl_max_send_size) ?
1422+
mca_btl_openib_module.super.btl_eager_limit :
1423+
mca_btl_openib_module.super.btl_max_send_size;
1424+
}
1425+
13891426
if (max_qp_size < max_size_needed) {
13901427
opal_show_help("help-mpi-btl-openib.txt",
13911428
"biggest qp size is too small", true,
@@ -2089,16 +2126,14 @@ static int init_one_device(opal_list_t *btl_list, struct ibv_device* ib_dev)
20892126
* altered it via MPI_T */
20902127
int index;
20912128
mca_base_var_source_t source;
2092-
index = mca_base_var_find("opal","btl","openib","receive_queues");
2093-
if (index >= 0) {
2094-
if (OPAL_SUCCESS != (ret = mca_base_var_get_value(index, NULL, &source, NULL))) {
2095-
BTL_ERROR(("mca_base_var_get_value failed to get value for receive_queues: %s:%d",
2096-
__FILE__, __LINE__));
2097-
goto error;
2098-
} else {
2099-
mca_btl_openib_component.receive_queues_source = source;
2100-
}
2129+
2130+
if (OPAL_SUCCESS != (ret = get_var_source ("receive_queues", &source))) {
2131+
BTL_ERROR(("mca_base_var_get_value failed to get value for receive_queues: %s:%d",
2132+
__FILE__, __LINE__));
2133+
goto error;
21012134
}
2135+
2136+
mca_btl_openib_component.receive_queues_source = source;
21022137
}
21032138

21042139
/* If the MCA param was specified, skip all the checks */

0 commit comments

Comments
 (0)