Skip to content

Commit e3e039b

Browse files
authored
Merge pull request #6473 from abouteiller/backport/5975/v4.0.x
v4.0.x: Avoid a double lock interlock when calling pmix_finalize
2 parents 5e3cf1e + cf34de3 commit e3e039b

File tree

4 files changed

+56
-9
lines changed

4 files changed

+56
-9
lines changed

opal/mca/pmix/ext2x/ext2x_client.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
99
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1010
* reserved.
11+
* Copyright (c) 2018 The University of Tennessee and The University
12+
* of Tennessee Research Foundation. All rights
13+
* reserved.
1114
* $COPYRIGHT$
1215
*
1316
* Additional copyrights may follow
@@ -165,6 +168,8 @@ int ext2x_client_finalize(void)
165168
{
166169
pmix_status_t rc;
167170
opal_ext2x_event_t *event, *ev2;
171+
opal_list_t evlist;
172+
OBJ_CONSTRUCT(&evlist, opal_list_t);
168173

169174
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
170175
"PMIx_client finalize");
@@ -178,12 +183,19 @@ int ext2x_client_finalize(void)
178183
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
179184
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
180185
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
181-
OPAL_PMIX_WAIT_THREAD(&event->lock);
182186
opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super);
183-
OBJ_RELEASE(event);
187+
/* wait and release outside the loop to avoid double mutex
188+
* interlock */
189+
opal_list_append(&evlist, &event->super);
184190
}
185191
}
186192
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
193+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext2x_event_t) {
194+
OPAL_PMIX_WAIT_THREAD(&event->lock);
195+
opal_list_remove_item(&evlist, &event->super);
196+
OBJ_RELEASE(event);
197+
}
198+
OBJ_DESTRUCT(&evlist);
187199
rc = PMIx_Finalize(NULL, 0);
188200

189201
return ext2x_convert_rc(rc);

opal/mca/pmix/ext2x/ext2x_server_south.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
1010
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
1111
* reserved.
12+
* Copyright (c) 2018 The University of Tennessee and The University
13+
* of Tennessee Research Foundation. All rights
14+
* reserved.
1215
* $COPYRIGHT$
1316
*
1417
* Additional copyrights may follow
@@ -180,6 +183,8 @@ int ext2x_server_finalize(void)
180183
{
181184
pmix_status_t rc;
182185
opal_ext2x_event_t *event, *ev2;
186+
opal_list_t evlist;
187+
OBJ_CONSTRUCT(&evlist, opal_list_t);
183188

184189
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
185190
--opal_pmix_base.initialized;
@@ -190,13 +195,19 @@ int ext2x_server_finalize(void)
190195
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
191196
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
192197
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
193-
OPAL_PMIX_WAIT_THREAD(&event->lock);
194198
opal_list_remove_item(&mca_pmix_ext2x_component.events, &event->super);
195-
OBJ_RELEASE(event);
199+
/* wait and release outside the loop to avoid double mutex
200+
* interlock */
201+
opal_list_append(&evlist, &event->super);
196202
}
197203
}
198204
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
199-
205+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_ext2x_event_t) {
206+
OPAL_PMIX_WAIT_THREAD(&event->lock);
207+
opal_list_remove_item(&evlist, &event->super);
208+
OBJ_RELEASE(event);
209+
}
210+
OBJ_DESTRUCT(&evlist);
200211
rc = PMIx_server_finalize();
201212
return ext2x_convert_rc(rc);
202213
}

opal/mca/pmix/pmix3x/pmix3x_client.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
99
* Copyright (c) 2016 Los Alamos National Security, LLC. All rights
1010
* reserved.
11+
* Copyright (c) 2017-2018 The University of Tennessee and The University
12+
* of Tennessee Research Foundation. All rights
13+
* reserved.
1114
* $COPYRIGHT$
1215
*
1316
* Additional copyrights may follow
@@ -169,6 +172,8 @@ int pmix3x_client_finalize(void)
169172
{
170173
pmix_status_t rc;
171174
opal_pmix3x_event_t *event, *ev2;
175+
opal_list_t evlist;
176+
OBJ_CONSTRUCT(&evlist, opal_list_t);
172177

173178
opal_output_verbose(1, opal_pmix_base_framework.framework_output,
174179
"PMIx_client finalize");
@@ -182,12 +187,19 @@ int pmix3x_client_finalize(void)
182187
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
183188
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
184189
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
185-
OPAL_PMIX_WAIT_THREAD(&event->lock);
186190
opal_list_remove_item(&mca_pmix_pmix3x_component.events, &event->super);
187-
OBJ_RELEASE(event);
191+
/* wait and release outside the loop to avoid double mutex
192+
* interlock */
193+
opal_list_append(&evlist, &event->super);
188194
}
189195
}
190196
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
197+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix3x_event_t) {
198+
OPAL_PMIX_WAIT_THREAD(&event->lock);
199+
opal_list_remove_item(&evlist, &event->super);
200+
OBJ_RELEASE(event);
201+
}
202+
OBJ_DESTRUCT(&evlist);
191203
rc = PMIx_Finalize(NULL, 0);
192204

193205
return pmix3x_convert_rc(rc);

opal/mca/pmix/pmix3x/pmix3x_server_south.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
* Copyright (c) 2016 Cisco Systems, Inc. All rights reserved.
1010
* Copyright (c) 2017 Los Alamos National Security, LLC. All rights
1111
* reserved.
12+
* Copyright (c) 2017-2018 The University of Tennessee and The University
13+
* of Tennessee Research Foundation. All rights
14+
* reserved.
1215
* $COPYRIGHT$
1316
*
1417
* Additional copyrights may follow
@@ -186,6 +189,8 @@ int pmix3x_server_finalize(void)
186189
{
187190
pmix_status_t rc;
188191
opal_pmix3x_event_t *event, *ev2;
192+
opal_list_t evlist;
193+
OBJ_CONSTRUCT(&evlist, opal_list_t);
189194

190195
OPAL_PMIX_ACQUIRE_THREAD(&opal_pmix_base.lock);
191196
--opal_pmix_base.initialized;
@@ -196,12 +201,19 @@ int pmix3x_server_finalize(void)
196201
OPAL_PMIX_DESTRUCT_LOCK(&event->lock);
197202
OPAL_PMIX_CONSTRUCT_LOCK(&event->lock);
198203
PMIx_Deregister_event_handler(event->index, dereg_cbfunc, (void*)event);
199-
OPAL_PMIX_WAIT_THREAD(&event->lock);
200204
opal_list_remove_item(&mca_pmix_pmix3x_component.events, &event->super);
201-
OBJ_RELEASE(event);
205+
/* wait and release outside the loop to avoid double mutex
206+
* interlock */
207+
opal_list_append(&evlist, &event->super);
202208
}
203209
}
204210
OPAL_PMIX_RELEASE_THREAD(&opal_pmix_base.lock);
211+
OPAL_LIST_FOREACH_SAFE(event, ev2, &evlist, opal_pmix3x_event_t) {
212+
OPAL_PMIX_WAIT_THREAD(&event->lock);
213+
opal_list_remove_item(&evlist, &event->super);
214+
OBJ_RELEASE(event);
215+
}
216+
OBJ_DESTRUCT(&evlist);
205217
rc = PMIx_server_finalize();
206218
return pmix3x_convert_rc(rc);
207219
}

0 commit comments

Comments
 (0)