1212 * All rights reserved.
1313 * Copyright (c) 2007-2013 Cisco Systems, Inc. All rights reserved.
1414 * Copyright (c) 2006-2015 Mellanox Technologies. All rights reserved.
15- * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
15+ * Copyright (c) 2006-2016 Los Alamos National Security, LLC. All rights
1616 * reserved.
1717 * Copyright (c) 2006-2007 Voltaire All rights reserved.
1818 * Copyright (c) 2008-2012 Oracle and/or its affiliates. All rights reserved.
@@ -399,14 +399,16 @@ static int create_srq(mca_btl_openib_module_t *openib_btl)
399399 }
400400 }
401401
402+ openib_btl -> srqs_created = true;
403+
402404 return OPAL_SUCCESS ;
403405}
404406
405407static int openib_btl_prepare (struct mca_btl_openib_module_t * openib_btl )
406408{
407409 int rc = OPAL_SUCCESS ;
408410 opal_mutex_lock (& openib_btl -> ib_lock );
409- if (0 == openib_btl -> num_peers &&
411+ if (! openib_btl -> srqs_created &&
410412 (mca_btl_openib_component .num_srq_qps > 0 ||
411413 mca_btl_openib_component .num_xrc_qps > 0 )) {
412414 rc = create_srq (openib_btl );
@@ -416,17 +418,12 @@ static int openib_btl_prepare(struct mca_btl_openib_module_t* openib_btl)
416418}
417419
418420
419- static int openib_btl_size_queues (struct mca_btl_openib_module_t * openib_btl , size_t nprocs )
421+ static int openib_btl_size_queues (struct mca_btl_openib_module_t * openib_btl )
420422{
421423 uint32_t send_cqes , recv_cqes ;
422424 int rc = OPAL_SUCCESS , qp ;
423425 mca_btl_openib_device_t * device = openib_btl -> device ;
424426
425- if ( 0 == nprocs ){
426- /* nothing to do */
427- return OPAL_SUCCESS ;
428- }
429-
430427 opal_mutex_lock (& openib_btl -> ib_lock );
431428 /* figure out reasonable sizes for completion queues */
432429 for (qp = 0 ; qp < mca_btl_openib_component .num_qps ; qp ++ ) {
@@ -435,7 +432,7 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si
435432 recv_cqes = mca_btl_openib_component .qp_infos [qp ].rd_num ;
436433 } else {
437434 send_cqes = (mca_btl_openib_component .qp_infos [qp ].rd_num +
438- mca_btl_openib_component .qp_infos [qp ].u .pp_qp .rd_rsv ) * nprocs ;
435+ mca_btl_openib_component .qp_infos [qp ].u .pp_qp .rd_rsv ) * openib_btl -> num_peers ;
439436 recv_cqes = send_cqes ;
440437 }
441438
@@ -455,7 +452,6 @@ static int openib_btl_size_queues(struct mca_btl_openib_module_t* openib_btl, si
455452 goto out ;
456453 }
457454
458- openib_btl -> num_peers += nprocs ;
459455out :
460456 opal_mutex_unlock (& openib_btl -> ib_lock );
461457 return rc ;
@@ -1028,10 +1024,14 @@ int mca_btl_openib_add_procs(
10281024 return rc ;
10291025 }
10301026
1031- rc = openib_btl_prepare (openib_btl );
1032- if (OPAL_SUCCESS != rc ) {
1033- BTL_ERROR (("could not prepare openib btl structure for usel" ));
1034- return rc ;
1027+ if (0 == openib_btl -> num_peers ) {
1028+ /* ensure completion queues are created before attempting to
1029+ * make a loop-back queue pair */
1030+ rc = openib_btl_size_queues (openib_btl );
1031+ if (OPAL_SUCCESS != rc ) {
1032+ BTL_ERROR (("error creating cqs" ));
1033+ return rc ;
1034+ }
10351035 }
10361036
10371037 /* prepare all proc's and account them properly */
@@ -1080,10 +1080,20 @@ int mca_btl_openib_add_procs(
10801080 }
10811081 }
10821082
1083- /* account this procs if need */
1084- rc = openib_btl_size_queues (openib_btl , nprocs_new );
1083+ if (nprocs_new ) {
1084+ OPAL_THREAD_ADD32 (& openib_btl -> num_peers , nprocs_new );
1085+
1086+ /* adjust cq sizes given the new procs */
1087+ rc = openib_btl_size_queues (openib_btl );
1088+ if (OPAL_SUCCESS != rc ) {
1089+ BTL_ERROR (("error creating cqs" ));
1090+ return rc ;
1091+ }
1092+ }
1093+
1094+ rc = openib_btl_prepare (openib_btl );
10851095 if (OPAL_SUCCESS != rc ) {
1086- BTL_ERROR (("error creating cqs " ));
1096+ BTL_ERROR (("could not prepare openib btl module for use " ));
10871097 return rc ;
10881098 }
10891099
@@ -1156,23 +1166,15 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11561166{
11571167 mca_btl_openib_module_t * openib_btl = (mca_btl_openib_module_t * ) btl ;
11581168 volatile mca_btl_base_endpoint_t * endpoint = NULL ;
1169+ int local_port_cnt = 0 , btl_rank , rc ;
11591170 mca_btl_openib_proc_t * ib_proc ;
1160- int rc ;
1161- int local_port_cnt = 0 , btl_rank ;
1162- size_t nprocs_new = 0 ;
11631171
11641172 rc = prepare_device_for_use (openib_btl -> device );
11651173 if (OPAL_SUCCESS != rc ) {
11661174 BTL_ERROR (("could not prepare openib device for use" ));
11671175 return NULL ;
11681176 }
11691177
1170- rc = openib_btl_prepare (openib_btl );
1171- if (OPAL_SUCCESS != rc ) {
1172- BTL_ERROR (("could not prepare openib btl structure for use" ));
1173- return NULL ;
1174- }
1175-
11761178 if (NULL == (ib_proc = mca_btl_openib_proc_get_locked (proc ))) {
11771179 /* if we don't have connection info for this process, it's
11781180 * okay because some other method might be able to reach it,
@@ -1189,7 +1191,8 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11891191
11901192 /* this is a new process to this openib btl
11911193 * account this procs if need */
1192- rc = openib_btl_size_queues (openib_btl , nprocs_new );
1194+ OPAL_THREAD_ADD32 (& openib_btl -> num_peers , 1 );
1195+ rc = openib_btl_size_queues (openib_btl );
11931196 if (OPAL_SUCCESS != rc ) {
11941197 BTL_ERROR (("error creating cqs" ));
11951198 return NULL ;
@@ -1214,6 +1217,12 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
12141217 return NULL ;
12151218 }
12161219
1220+ rc = openib_btl_prepare (openib_btl );
1221+ if (OPAL_SUCCESS != rc ) {
1222+ BTL_ERROR (("could not prepare openib btl structure for use" ));
1223+ goto exit ;
1224+ }
1225+
12171226 for (size_t j = 0 ; j < ib_proc -> proc_endpoint_count ; ++ j ) {
12181227 endpoint = ib_proc -> proc_endpoints [j ];
12191228 if (endpoint -> endpoint_btl == openib_btl ) {
0 commit comments