Skip to content

Commit 936dfe5

Browse files
committed
Merge pull request #1592 from hjelmn/tcp_thread_fix
btl/tcp: fix add_procs race condition
2 parents 01c90d4 + 03f4a85 commit 936dfe5

File tree

1 file changed

+55
-40
lines changed

1 file changed

+55
-40
lines changed

opal/mca/btl/tcp/btl_tcp_proc.c

Lines changed: 55 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 2013-2015 Intel, Inc. All rights reserved
1515
* Copyright (c) 2014-2015 Research Organization for Information Science
1616
* and Technology (RIST). All rights reserved.
17-
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
17+
* Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights
1818
* reserved.
1919
* Copyright (c) 2015 Cisco Systems, Inc. All rights reserved.
2020
* $COPYRIGHT$
@@ -122,52 +122,53 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
122122
return btl_proc;
123123
}
124124

125-
btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
126-
if(NULL == btl_proc)
127-
return NULL;
128-
btl_proc->proc_opal = proc;
129-
OBJ_RETAIN(btl_proc->proc_opal);
125+
do {
126+
btl_proc = OBJ_NEW(mca_btl_tcp_proc_t);
127+
if(NULL == btl_proc) {
128+
rc = OPAL_ERR_OUT_OF_RESOURCE;
129+
break;
130+
}
130131

131-
/* add to hash table of all proc instance */
132-
opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs,
133-
proc->proc_name, btl_proc);
134-
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
132+
btl_proc->proc_opal = proc;
135133

136-
/* lookup tcp parameters exported by this proc */
137-
OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version,
138-
&proc->proc_name, (uint8_t**)&btl_proc->proc_addrs, &size);
139-
if(rc != OPAL_SUCCESS) {
140-
if(OPAL_ERR_NOT_FOUND != rc)
141-
BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc));
142-
OBJ_RELEASE(btl_proc);
143-
return NULL;
144-
}
145-
if(0 != (size % sizeof(mca_btl_tcp_addr_t))) {
146-
BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n",
147-
(unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t)));
148-
return NULL;
149-
}
150-
btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t);
151-
152-
/* allocate space for endpoint array - one for each exported address */
153-
btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
154-
malloc((1 + btl_proc->proc_addr_count) *
155-
sizeof(mca_btl_base_endpoint_t*));
156-
if(NULL == btl_proc->proc_endpoints) {
157-
OBJ_RELEASE(btl_proc);
158-
return NULL;
159-
}
134+
OBJ_RETAIN(btl_proc->proc_opal);
135+
136+
/* lookup tcp parameters exported by this proc */
137+
OPAL_MODEX_RECV(rc, &mca_btl_tcp_component.super.btl_version,
138+
&proc->proc_name, (uint8_t**)&btl_proc->proc_addrs, &size);
139+
if(rc != OPAL_SUCCESS) {
140+
if(OPAL_ERR_NOT_FOUND != rc)
141+
BTL_ERROR(("opal_modex_recv: failed with return value=%d", rc));
142+
break;
143+
}
144+
145+
if(0 != (size % sizeof(mca_btl_tcp_addr_t))) {
146+
BTL_ERROR(("opal_modex_recv: invalid size %lu: btl-size: %lu\n",
147+
(unsigned long) size, (unsigned long)sizeof(mca_btl_tcp_addr_t)));
148+
rc = OPAL_ERROR;
149+
break;
150+
}
151+
152+
btl_proc->proc_addr_count = size / sizeof(mca_btl_tcp_addr_t);
153+
154+
/* allocate space for endpoint array - one for each exported address */
155+
btl_proc->proc_endpoints = (mca_btl_base_endpoint_t**)
156+
malloc((1 + btl_proc->proc_addr_count) *
157+
sizeof(mca_btl_base_endpoint_t*));
158+
if(NULL == btl_proc->proc_endpoints) {
159+
rc = OPAL_ERR_OUT_OF_RESOURCE;
160+
break;
161+
}
162+
163+
if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) {
164+
mca_btl_tcp_component.tcp_local = btl_proc;
165+
}
160166

161-
if(NULL == mca_btl_tcp_component.tcp_local && (proc == opal_proc_local_get())) {
162-
mca_btl_tcp_component.tcp_local = btl_proc;
163-
}
164-
{
165167
/* convert the OPAL addr_family field to OS constants,
166168
* so we can check for AF_INET (or AF_INET6) and don't have
167169
* to deal with byte ordering anymore.
168170
*/
169-
unsigned int i;
170-
for (i = 0; i < btl_proc->proc_addr_count; i++) {
171+
for (unsigned int i = 0; i < btl_proc->proc_addr_count; i++) {
171172
if (MCA_BTL_TCP_AF_INET == btl_proc->proc_addrs[i].addr_family) {
172173
btl_proc->proc_addrs[i].addr_family = AF_INET;
173174
}
@@ -177,7 +178,21 @@ mca_btl_tcp_proc_t* mca_btl_tcp_proc_create(opal_proc_t* proc)
177178
}
178179
#endif
179180
}
181+
} while (0);
182+
183+
if (OPAL_SUCCESS == rc) {
184+
/* add to hash table of all proc instance. */
185+
opal_proc_table_set_value(&mca_btl_tcp_component.tcp_procs,
186+
proc->proc_name, btl_proc);
187+
} else {
188+
if (btl_proc) {
189+
OBJ_RELEASE(btl_proc);
190+
btl_proc = NULL;
191+
}
180192
}
193+
194+
OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock);
195+
181196
return btl_proc;
182197
}
183198

0 commit comments

Comments
 (0)