@@ -1004,7 +1004,8 @@ int mca_btl_openib_add_procs(
10041004 opal_bitmap_t * reachable )
10051005{
10061006 mca_btl_openib_module_t * openib_btl = (mca_btl_openib_module_t * )btl ;
1007- int i ,j , rc , local_procs ;
1007+ size_t nprocs_new_loc = 0 , nprocs_new = 0 ;
1008+ int i ,j , rc ;
10081009 int lcl_subnet_id_port_cnt = 0 ;
10091010 int btl_rank = 0 ;
10101011 volatile mca_btl_base_endpoint_t * endpoint ;
@@ -1037,24 +1038,74 @@ int mca_btl_openib_add_procs(
10371038 return rc ;
10381039 }
10391040
1040- rc = openib_btl_size_queues (openib_btl , nprocs );
1041+ /* prepare all proc's and account them properly */
1042+ for (i = 0 , nprocs_new_loc = 0 ; i < (int ) nprocs ; i ++ ) {
1043+ struct opal_proc_t * proc = procs [i ];
1044+ mca_btl_openib_proc_t * ib_proc ;
1045+
1046+ #if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE )
1047+ /* Most current iWARP adapters (June 2008) cannot handle
1048+ talking to other processes on the same host (!) -- so mark
1049+ them as unreachable (need to use sm). So for the moment,
1050+ we'll just mark any local peer on an iWARP NIC as
1051+ unreachable. See trac ticket #1352. */
1052+ if (IBV_TRANSPORT_IWARP == openib_btl -> device -> ib_dev -> transport_type &&
1053+ OPAL_PROC_ON_LOCAL_NODE (proc -> proc_flags )) {
1054+ continue ;
1055+ }
1056+ #endif
1057+
1058+ if (NULL == (ib_proc = mca_btl_openib_proc_get_locked (proc )) ) {
1059+ /* if we don't have connection info for this process, it's
1060+ * okay because some other method might be able to reach it,
1061+ * so just mark it as unreachable by us */
1062+ continue ;
1063+ }
1064+
1065+ /* account this openib_btl in this proc */
1066+ rc = mca_btl_openib_proc_reg_btl (ib_proc , openib_btl );
1067+
1068+ opal_mutex_unlock ( & ib_proc -> proc_lock );
1069+
1070+ switch ( rc ){
1071+ case OPAL_SUCCESS :
1072+ /* this is a new process to this openib btl */
1073+ nprocs_new ++ ;
1074+ if (OPAL_PROC_ON_LOCAL_NODE (proc -> proc_flags )) {
1075+ nprocs_new_loc ++ ;
1076+ }
1077+ break ;
1078+ case OPAL_ERR_RESOURCE_BUSY :
1079+ /* process was accounted earlier in this openib btl */
1080+ break ;
1081+ default :
1082+ /* unexpected error, e.g. out of mem */
1083+ return rc ;
1084+ }
1085+ }
1086+
1087+ /* account this procs if need */
1088+ rc = openib_btl_size_queues (openib_btl , nprocs_new );
10411089 if (OPAL_SUCCESS != rc ) {
10421090 BTL_ERROR (("error creating cqs" ));
10431091 return rc ;
10441092 }
10451093
1046- for (i = 0 , local_procs = 0 ; i < (int ) nprocs ; i ++ ) {
1094+ opal_mutex_lock (& openib_btl -> device -> device_lock );
1095+ openib_btl -> local_procs += nprocs_new_loc ;
1096+ if ( 0 < nprocs_new_loc ){
1097+ openib_btl -> device -> mem_reg_max = openib_btl -> device -> mem_reg_max_total / openib_btl -> local_procs ;
1098+ }
1099+ opal_mutex_unlock (& openib_btl -> device -> device_lock );
1100+
1101+ /* prepare endpoints */
1102+ for (i = 0 , nprocs_new_loc = 0 ; i < (int ) nprocs ; i ++ ) {
10471103 struct opal_proc_t * proc = procs [i ];
10481104 mca_btl_openib_proc_t * ib_proc ;
10491105 bool found_existing = false;
1050- bool is_new ;
10511106
10521107 opal_output (-1 , "add procs: adding proc %d" , i );
10531108
1054- if (OPAL_PROC_ON_LOCAL_NODE (proc -> proc_flags )) {
1055- local_procs ++ ;
1056- }
1057-
10581109#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE )
10591110 /* Most current iWARP adapters (June 2008) cannot handle
10601111 talking to other processes on the same host (!) -- so mark
@@ -1067,7 +1118,7 @@ int mca_btl_openib_add_procs(
10671118 }
10681119#endif
10691120
1070- if (NULL == (ib_proc = mca_btl_openib_proc_get_locked (proc , & is_new )) ) {
1121+ if (NULL == (ib_proc = mca_btl_openib_proc_get_locked (proc )) ) {
10711122 /* if we don't have connection info for this process, it's
10721123 * okay because some other method might be able to reach it,
10731124 * so just mark it as unreachable by us */
@@ -1076,13 +1127,11 @@ int mca_btl_openib_add_procs(
10761127
10771128 found_existing = false;
10781129
1079- if ( !is_new ){
1080- for (j = 0 ; j < (int ) ib_proc -> proc_endpoint_count ; ++ j ) {
1081- endpoint = ib_proc -> proc_endpoints [j ];
1082- if (endpoint -> endpoint_btl == openib_btl ) {
1083- found_existing = true;
1084- break ;
1085- }
1130+ for (j = 0 ; j < (int ) ib_proc -> proc_endpoint_count ; ++ j ) {
1131+ endpoint = ib_proc -> proc_endpoints [j ];
1132+ if (endpoint -> endpoint_btl == openib_btl ) {
1133+ found_existing = true;
1134+ break ;
10861135 }
10871136 }
10881137
@@ -1104,11 +1153,6 @@ int mca_btl_openib_add_procs(
11041153
11051154 }
11061155
1107- opal_mutex_lock (& openib_btl -> ib_lock );
1108- openib_btl -> local_procs += local_procs ;
1109- openib_btl -> device -> mem_reg_max = openib_btl -> device -> mem_reg_max_total / openib_btl -> local_procs ;
1110- opal_mutex_unlock (& openib_btl -> ib_lock );
1111-
11121156 return OPAL_SUCCESS ;
11131157}
11141158
@@ -1119,7 +1163,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11191163 mca_btl_openib_proc_t * ib_proc ;
11201164 int rc ;
11211165 int local_port_cnt = 0 , btl_rank ;
1122- bool is_new ;
1166+ size_t nprocs_new = 0 ;
11231167
11241168 rc = prepare_device_for_use (openib_btl -> device );
11251169 if (OPAL_SUCCESS != rc ) {
@@ -1133,25 +1177,51 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11331177 return NULL ;
11341178 }
11351179
1136- rc = openib_btl_size_queues (openib_btl , 1 );
1137- if (OPAL_SUCCESS != rc ) {
1138- BTL_ERROR (("error creating cqs" ));
1139- return NULL ;
1140- }
1141-
1142- if (NULL == (ib_proc = mca_btl_openib_proc_get_locked (proc , & is_new ))) {
1180+ if (NULL == (ib_proc = mca_btl_openib_proc_get_locked (proc ))) {
11431181 /* if we don't have connection info for this process, it's
11441182 * okay because some other method might be able to reach it,
11451183 * so just mark it as unreachable by us */
11461184 return NULL ;
11471185 }
11481186
1149- if ( !is_new ){
1150- for (size_t j = 0 ; j < ib_proc -> proc_endpoint_count ; ++ j ) {
1151- endpoint = ib_proc -> proc_endpoints [j ];
1152- if (endpoint -> endpoint_btl == openib_btl ) {
1153- goto exit ;
1154- }
1187+ rc = mca_btl_openib_proc_reg_btl (ib_proc , openib_btl );
1188+
1189+ switch ( rc ){
1190+ case OPAL_SUCCESS :
1191+ /* unlock first to avoid possible deadlocks */
1192+ opal_mutex_unlock (& ib_proc -> proc_lock );
1193+
1194+ /* this is a new process to this openib btl
1195+ * account this procs if need */
1196+ rc = openib_btl_size_queues (openib_btl , nprocs_new );
1197+ if (OPAL_SUCCESS != rc ) {
1198+ BTL_ERROR (("error creating cqs" ));
1199+ return NULL ;
1200+ }
1201+
1202+ if ( OPAL_PROC_ON_LOCAL_NODE (proc -> proc_flags ) ) {
1203+ opal_mutex_lock (& openib_btl -> ib_lock );
1204+ openib_btl -> local_procs += 1 ;
1205+ openib_btl -> device -> mem_reg_max = openib_btl -> device -> mem_reg_max_total / openib_btl -> local_procs ;
1206+ opal_mutex_unlock (& openib_btl -> ib_lock );
1207+ }
1208+
1209+ /* lock process back */
1210+ opal_mutex_lock (& ib_proc -> proc_lock );
1211+ break ;
1212+ case OPAL_ERR_RESOURCE_BUSY :
1213+ /* process was accounted earlier in this openib btl */
1214+ break ;
1215+ default :
1216+ /* unexpected error, e.g. out of mem */
1217+ BTL_ERROR (("Unexpected OPAL error %d" , rc ));
1218+ return NULL ;
1219+ }
1220+
1221+ for (size_t j = 0 ; j < ib_proc -> proc_endpoint_count ; ++ j ) {
1222+ endpoint = ib_proc -> proc_endpoints [j ];
1223+ if (endpoint -> endpoint_btl == openib_btl ) {
1224+ goto exit ;
11551225 }
11561226 }
11571227
@@ -1168,14 +1238,6 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11681238exit :
11691239 opal_mutex_unlock (& ib_proc -> proc_lock );
11701240
1171- if ( (NULL != endpoint ) && is_new &&
1172- OPAL_PROC_ON_LOCAL_NODE (proc -> proc_flags )) {
1173- opal_mutex_lock (& openib_btl -> ib_lock );
1174- openib_btl -> local_procs += 1 ;
1175- openib_btl -> device -> mem_reg_max = openib_btl -> device -> mem_reg_max_total / openib_btl -> local_procs ;
1176- opal_mutex_unlock (& openib_btl -> ib_lock );
1177- }
1178-
11791241 return (struct mca_btl_base_endpoint_t * )endpoint ;
11801242}
11811243
0 commit comments