Skip to content

Commit 3031aff

Browse files
committed
Fix openib process accounting if procs was dynamically added.
1 parent 400af6c commit 3031aff

File tree

3 files changed

+181
-49
lines changed

3 files changed

+181
-49
lines changed

opal/mca/btl/openib/btl_openib.c

Lines changed: 105 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,8 @@ int mca_btl_openib_add_procs(
10041004
opal_bitmap_t* reachable)
10051005
{
10061006
mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)btl;
1007-
int i,j, rc, local_procs;
1007+
size_t nprocs_new_loc = 0, nprocs_new = 0;
1008+
int i,j, rc;
10081009
int lcl_subnet_id_port_cnt = 0;
10091010
int btl_rank = 0;
10101011
volatile mca_btl_base_endpoint_t* endpoint;
@@ -1037,24 +1038,74 @@ int mca_btl_openib_add_procs(
10371038
return rc;
10381039
}
10391040

1040-
rc = openib_btl_size_queues(openib_btl, nprocs);
1041+
/* prepare all proc's and account them properly */
1042+
for (i = 0, nprocs_new_loc = 0 ; i < (int) nprocs; i++) {
1043+
struct opal_proc_t* proc = procs[i];
1044+
mca_btl_openib_proc_t* ib_proc;
1045+
1046+
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
1047+
/* Most current iWARP adapters (June 2008) cannot handle
1048+
talking to other processes on the same host (!) -- so mark
1049+
them as unreachable (need to use sm). So for the moment,
1050+
we'll just mark any local peer on an iWARP NIC as
1051+
unreachable. See trac ticket #1352. */
1052+
if (IBV_TRANSPORT_IWARP == openib_btl->device->ib_dev->transport_type &&
1053+
OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
1054+
continue;
1055+
}
1056+
#endif
1057+
1058+
if(NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc)) ) {
1059+
/* if we don't have connection info for this process, it's
1060+
* okay because some other method might be able to reach it,
1061+
* so just mark it as unreachable by us */
1062+
continue;
1063+
}
1064+
1065+
/* account this openib_btl in this proc */
1066+
rc = mca_btl_openib_proc_reg_btl(ib_proc, openib_btl);
1067+
1068+
opal_mutex_unlock( &ib_proc->proc_lock );
1069+
1070+
switch( rc ){
1071+
case OPAL_SUCCESS:
1072+
/* this is a new process to this openib btl */
1073+
nprocs_new++;
1074+
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
1075+
nprocs_new_loc ++;
1076+
}
1077+
break;
1078+
case OPAL_ERR_RESOURCE_BUSY:
1079+
/* process was accounted earlier in this openib btl */
1080+
break;
1081+
default:
1082+
/* unexpected error, e.g. out of mem */
1083+
return rc;
1084+
}
1085+
}
1086+
1087+
/* account this procs if need */
1088+
rc = openib_btl_size_queues(openib_btl, nprocs_new);
10411089
if (OPAL_SUCCESS != rc) {
10421090
BTL_ERROR(("error creating cqs"));
10431091
return rc;
10441092
}
10451093

1046-
for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
1094+
opal_mutex_lock(&openib_btl->device->device_lock);
1095+
openib_btl->local_procs += nprocs_new_loc;
1096+
if( 0 < nprocs_new_loc ){
1097+
openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs;
1098+
}
1099+
opal_mutex_unlock(&openib_btl->device->device_lock);
1100+
1101+
/* prepare endpoints */
1102+
for (i = 0, nprocs_new_loc = 0 ; i < (int) nprocs; i++) {
10471103
struct opal_proc_t* proc = procs[i];
10481104
mca_btl_openib_proc_t* ib_proc;
10491105
bool found_existing = false;
1050-
bool is_new;
10511106

10521107
opal_output(-1, "add procs: adding proc %d", i);
10531108

1054-
if (OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
1055-
local_procs ++;
1056-
}
1057-
10581109
#if defined(HAVE_STRUCT_IBV_DEVICE_TRANSPORT_TYPE)
10591110
/* Most current iWARP adapters (June 2008) cannot handle
10601111
talking to other processes on the same host (!) -- so mark
@@ -1067,7 +1118,7 @@ int mca_btl_openib_add_procs(
10671118
}
10681119
#endif
10691120

1070-
if(NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc, &is_new)) ) {
1121+
if(NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc)) ) {
10711122
/* if we don't have connection info for this process, it's
10721123
* okay because some other method might be able to reach it,
10731124
* so just mark it as unreachable by us */
@@ -1076,13 +1127,11 @@ int mca_btl_openib_add_procs(
10761127

10771128
found_existing = false;
10781129

1079-
if( !is_new ){
1080-
for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) {
1081-
endpoint = ib_proc->proc_endpoints[j];
1082-
if (endpoint->endpoint_btl == openib_btl) {
1083-
found_existing = true;
1084-
break;
1085-
}
1130+
for (j = 0 ; j < (int) ib_proc->proc_endpoint_count ; ++j) {
1131+
endpoint = ib_proc->proc_endpoints[j];
1132+
if (endpoint->endpoint_btl == openib_btl) {
1133+
found_existing = true;
1134+
break;
10861135
}
10871136
}
10881137

@@ -1104,11 +1153,6 @@ int mca_btl_openib_add_procs(
11041153

11051154
}
11061155

1107-
opal_mutex_lock(&openib_btl->ib_lock);
1108-
openib_btl->local_procs += local_procs;
1109-
openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs;
1110-
opal_mutex_unlock(&openib_btl->ib_lock);
1111-
11121156
return OPAL_SUCCESS;
11131157
}
11141158

@@ -1119,7 +1163,7 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11191163
mca_btl_openib_proc_t *ib_proc;
11201164
int rc;
11211165
int local_port_cnt = 0, btl_rank;
1122-
bool is_new;
1166+
size_t nprocs_new = 0;
11231167

11241168
rc = prepare_device_for_use (openib_btl->device);
11251169
if (OPAL_SUCCESS != rc) {
@@ -1133,25 +1177,51 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11331177
return NULL;
11341178
}
11351179

1136-
rc = openib_btl_size_queues(openib_btl, 1);
1137-
if (OPAL_SUCCESS != rc) {
1138-
BTL_ERROR(("error creating cqs"));
1139-
return NULL;
1140-
}
1141-
1142-
if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc, &is_new))) {
1180+
if (NULL == (ib_proc = mca_btl_openib_proc_get_locked(proc))) {
11431181
/* if we don't have connection info for this process, it's
11441182
* okay because some other method might be able to reach it,
11451183
* so just mark it as unreachable by us */
11461184
return NULL;
11471185
}
11481186

1149-
if( !is_new ){
1150-
for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) {
1151-
endpoint = ib_proc->proc_endpoints[j];
1152-
if (endpoint->endpoint_btl == openib_btl) {
1153-
goto exit;
1154-
}
1187+
rc = mca_btl_openib_proc_reg_btl(ib_proc, openib_btl);
1188+
1189+
switch( rc ){
1190+
case OPAL_SUCCESS:
1191+
/* unlock first to avoid possible deadlocks */
1192+
opal_mutex_unlock(&ib_proc->proc_lock);
1193+
1194+
/* this is a new process to this openib btl
1195+
* account this procs if need */
1196+
rc = openib_btl_size_queues(openib_btl, nprocs_new);
1197+
if (OPAL_SUCCESS != rc) {
1198+
BTL_ERROR(("error creating cqs"));
1199+
return NULL;
1200+
}
1201+
1202+
if( OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags) ) {
1203+
opal_mutex_lock(&openib_btl->ib_lock);
1204+
openib_btl->local_procs += 1;
1205+
openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs;
1206+
opal_mutex_unlock(&openib_btl->ib_lock);
1207+
}
1208+
1209+
/* lock process back */
1210+
opal_mutex_lock(&ib_proc->proc_lock);
1211+
break;
1212+
case OPAL_ERR_RESOURCE_BUSY:
1213+
/* process was accounted earlier in this openib btl */
1214+
break;
1215+
default:
1216+
/* unexpected error, e.g. out of mem */
1217+
BTL_ERROR(("Unexpected OPAL error %d", rc));
1218+
return NULL;
1219+
}
1220+
1221+
for (size_t j = 0 ; j < ib_proc->proc_endpoint_count ; ++j) {
1222+
endpoint = ib_proc->proc_endpoints[j];
1223+
if (endpoint->endpoint_btl == openib_btl) {
1224+
goto exit;
11551225
}
11561226
}
11571227

@@ -1168,14 +1238,6 @@ struct mca_btl_base_endpoint_t *mca_btl_openib_get_ep (struct mca_btl_base_modul
11681238
exit:
11691239
opal_mutex_unlock(&ib_proc->proc_lock);
11701240

1171-
if ( (NULL != endpoint) && is_new &&
1172-
OPAL_PROC_ON_LOCAL_NODE(proc->proc_flags)) {
1173-
opal_mutex_lock(&openib_btl->ib_lock);
1174-
openib_btl->local_procs += 1;
1175-
openib_btl->device->mem_reg_max = openib_btl->device->mem_reg_max_total / openib_btl->local_procs;
1176-
opal_mutex_unlock(&openib_btl->ib_lock);
1177-
}
1178-
11791241
return (struct mca_btl_base_endpoint_t *)endpoint;
11801242
}
11811243

opal/mca/btl/openib/btl_openib_proc.c

Lines changed: 56 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,23 @@
2929
#include "connect/base.h"
3030
#include "connect/connect.h"
3131

32+
static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem);
33+
static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem);
34+
35+
OBJ_CLASS_INSTANCE(mca_btl_openib_proc_btlptr_t,
36+
opal_list_item_t, mca_btl_openib_proc_btl_construct,
37+
mca_btl_openib_proc_btl_destruct);
38+
39+
static void mca_btl_openib_proc_btl_construct(mca_btl_openib_proc_btlptr_t* elem)
40+
{
41+
elem->openib_btl = NULL;
42+
}
43+
44+
static void mca_btl_openib_proc_btl_destruct(mca_btl_openib_proc_btlptr_t* elem)
45+
{
46+
elem->openib_btl = NULL;
47+
}
48+
3249
static void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* proc);
3350
static void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* proc);
3451

@@ -44,6 +61,7 @@ void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc)
4461
ib_proc->proc_endpoints = 0;
4562
ib_proc->proc_endpoint_count = 0;
4663
OBJ_CONSTRUCT(&ib_proc->proc_lock, opal_mutex_t);
64+
OBJ_CONSTRUCT(&ib_proc->openib_btls, opal_list_t);
4765
}
4866

4967
/*
@@ -52,6 +70,8 @@ void mca_btl_openib_proc_construct(mca_btl_openib_proc_t* ib_proc)
5270

5371
void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc)
5472
{
73+
mca_btl_openib_proc_btlptr_t* elem;
74+
5575
/* release resources */
5676
if(NULL != ib_proc->proc_endpoints) {
5777
free(ib_proc->proc_endpoints);
@@ -68,6 +88,13 @@ void mca_btl_openib_proc_destruct(mca_btl_openib_proc_t* ib_proc)
6888
free(ib_proc->proc_ports);
6989
}
7090
OBJ_DESTRUCT(&ib_proc->proc_lock);
91+
92+
elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls);
93+
while( NULL != elem ){
94+
OBJ_RELEASE(elem);
95+
elem = (mca_btl_openib_proc_btlptr_t*)opal_list_remove_first(&ib_proc->openib_btls);
96+
}
97+
OBJ_DESTRUCT(&ib_proc->openib_btls);
7198
}
7299

73100

@@ -123,7 +150,7 @@ static void inline unpack8(char **src, uint8_t *value)
123150
* associated w/ a given destination on this datastructure.
124151
*/
125152

126-
mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc, bool *is_new)
153+
mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc)
127154
{
128155
mca_btl_openib_proc_t *ib_proc = NULL, *ib_proc_ret = NULL;
129156
size_t msg_size;
@@ -133,7 +160,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc, bool *i
133160
char *offset;
134161
int modex_message_size;
135162
mca_btl_openib_modex_message_t dummy;
136-
*is_new = false;
163+
bool is_new = false;
137164

138165
/* Check if we have already created a IB proc
139166
* structure for this ompi process */
@@ -252,7 +279,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc, bool *i
252279
if (0 == ib_proc->proc_port_count) {
253280
ib_proc->proc_endpoints = NULL;
254281
} else {
255-
ib_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
282+
ib_proc->proc_endpoints = (volatile mca_btl_base_endpoint_t**)
256283
malloc(ib_proc->proc_port_count *
257284
sizeof(mca_btl_base_endpoint_t*));
258285
}
@@ -273,7 +300,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc, bool *i
273300
opal_mutex_lock(&ib_proc->proc_lock);
274301
opal_list_append(&mca_btl_openib_component.ib_procs, &ib_proc->super);
275302
ib_proc_ret = ib_proc;
276-
*is_new = true;
303+
is_new = true;
277304
} else {
278305
/* otherwise - release module_proc */
279306
OBJ_RELEASE(ib_proc);
@@ -282,7 +309,7 @@ mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc, bool *i
282309

283310
/* if we haven't insert the process - lock it here so we
284311
* won't lock mca_btl_openib_component.ib_lock */
285-
if( !(*is_new) ){
312+
if( !is_new ){
286313
opal_mutex_lock(&ib_proc_ret->proc_lock);
287314
}
288315

@@ -354,3 +381,27 @@ int mca_btl_openib_proc_insert(mca_btl_openib_proc_t* module_proc,
354381
module_proc->proc_endpoints[module_proc->proc_endpoint_count++] = module_endpoint;
355382
return OPAL_SUCCESS;
356383
}
384+
385+
int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc,
386+
mca_btl_openib_module_t* openib_btl)
387+
{
388+
mca_btl_openib_proc_btlptr_t* elem;
389+
390+
391+
for(elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_first(&ib_proc->openib_btls);
392+
elem != (mca_btl_openib_proc_btlptr_t*)opal_list_get_end(&ib_proc->openib_btls);
393+
elem = (mca_btl_openib_proc_btlptr_t*)opal_list_get_next(elem)) {
394+
if(elem->openib_btl == openib_btl) {
395+
/* this is normal return meaning that this BTL has already touched this ib_proc */
396+
return OPAL_ERR_RESOURCE_BUSY;
397+
}
398+
}
399+
400+
elem = OBJ_NEW(mca_btl_openib_proc_btlptr_t);
401+
if( NULL == elem ){
402+
return OPAL_ERR_OUT_OF_RESOURCE;
403+
}
404+
elem->openib_btl = openib_btl;
405+
opal_list_append(&ib_proc->openib_btls, &elem->super);
406+
return OPAL_SUCCESS;
407+
}

opal/mca/btl/openib/btl_openib_proc.h

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,19 @@ typedef struct mca_btl_openib_proc_modex_t {
5252
uint8_t pm_cpc_data_count;
5353
} mca_btl_openib_proc_modex_t;
5454

55+
/**
56+
* The list element to hold pointers to openin_btls that are using this
57+
* ib_proc.
58+
*/
59+
60+
struct mca_btl_openib_proc_btlptr_t {
61+
opal_list_item_t super;
62+
mca_btl_openib_module_t* openib_btl;
63+
};
64+
typedef struct mca_btl_openib_proc_btlptr_t mca_btl_openib_proc_btlptr_t;
65+
66+
OBJ_CLASS_DECLARATION(mca_btl_openib_proc_btlptr_t);
67+
5568
/**
5669
* Represents the state of a remote process and the set of addresses
5770
* that it exports. Also cache an instance of mca_btl_base_endpoint_t for
@@ -71,6 +84,9 @@ struct mca_btl_openib_proc_t {
7184
/** length of proc_ports array */
7285
uint8_t proc_port_count;
7386

87+
/** list of openib_btl's that touched this proc **/
88+
opal_list_t openib_btls;
89+
7490
/** array of endpoints that have been created to access this proc */
7591
volatile struct mca_btl_base_endpoint_t **proc_endpoints;
7692

@@ -84,10 +100,13 @@ typedef struct mca_btl_openib_proc_t mca_btl_openib_proc_t;
84100

85101
OBJ_CLASS_DECLARATION(mca_btl_openib_proc_t);
86102

87-
mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc, bool *is_new);
103+
mca_btl_openib_proc_t* mca_btl_openib_proc_get_locked(opal_proc_t* proc);
88104
int mca_btl_openib_proc_insert(mca_btl_openib_proc_t*, mca_btl_base_endpoint_t*);
89105
int mca_btl_openib_proc_remove(opal_proc_t* proc,
90106
mca_btl_base_endpoint_t* module_endpoint);
107+
int mca_btl_openib_proc_reg_btl(mca_btl_openib_proc_t* ib_proc,
108+
mca_btl_openib_module_t* openib_btl);
109+
91110

92111
END_C_DECLS
93112

0 commit comments

Comments
 (0)