|
12 | 12 | * All rights reserved. |
13 | 13 | * Copyright (c) 2017 Los Alamos National Security, LLC. All rights |
14 | 14 | * reserved. |
| 15 | + * Copyright (c) 2022 IBM Corporation. All rights reserved. |
15 | 16 | * $COPYRIGHT$ |
16 | 17 | * |
17 | 18 | * Additional copyrights may follow |
@@ -55,6 +56,7 @@ static int hooks_support = 0; |
55 | 56 | static opal_list_t release_cb_list; |
56 | 57 | static opal_atomic_lock_t release_lock; |
57 | 58 | static int release_run_callbacks; |
| 59 | +static int is_initialized = false; |
58 | 60 |
|
59 | 61 | /** |
60 | 62 | * Finalize the memory hooks subsystem |
@@ -93,6 +95,7 @@ int opal_mem_hooks_init(void) |
93 | 95 | OBJ_CONSTRUCT(&release_cb_list, opal_list_t); |
94 | 96 |
|
95 | 97 | opal_atomic_lock_init(&release_lock, OPAL_ATOMIC_LOCK_UNLOCKED); |
| 98 | + is_initialized = true; |
96 | 99 |
|
97 | 100 | /* delay running callbacks until there is something in the |
98 | 101 | registration */ |
@@ -196,11 +199,40 @@ int opal_mem_hooks_unregister_release(opal_mem_hooks_callback_fn_t *func) |
196 | 199 | callback_list_item_t *cbitem, *found_item = NULL; |
197 | 200 | int ret = OPAL_ERR_NOT_FOUND; |
198 | 201 |
|
| 202 | +// I've added "is_initialized" to allow this call to be safe even if |
| 203 | +// a memory hooks .so was merely loaded but never used so this file's |
| 204 | +// init function was never called. I'll give more context, first |
| 205 | +// describing a bug hit in open-shmem: |
| 206 | +// |
| 207 | +// Ordinarily the expected behavior of memhook users is they'd register a |
| 208 | +// callback and then deregister it in a nice matched pair. And I think |
| 209 | +// most OMPI code does, but the open-shmem code isn't as clear and it |
| 210 | +// was loading a callback and just leaving it loaded after open-shmem was |
| 211 | +// unloaded. This was a problem because upon every malloc/free/etc we're |
| 212 | +// going to keep trying to call their callback, and the function pointer |
| 213 | +// itself became illegal as soon as the open-shmem shared lib was unloaded. |
| 214 | +// So I figured the best solution was to add a "safety valve" to the callback |
| 215 | +// users where they would have a library-level destructor that un-conditionally |
| 216 | +// adds an extra unregister call regardless of whether the code is already |
| 217 | +// matched or not. |
| 218 | +// |
| 219 | +// With that happening, it's necessary to make sure |
| 220 | +// opal_mem_hooks_unregister_release() is safe when the system isn't |
| 221 | +// initialized and/or if it was initialized but the specified callback |
| 222 | +// is already removed. |
| 223 | +// |
| 224 | +// Note also, the reason for checking "cbitem" before looking at cbitem->cbfunc |
| 225 | +// is when the list is empty the OPAL_LIST_FOREACH() empirically still iterates |
| 226 | +// once and gives a null cbitem. I'm not sure I like that, but that's what |
| 227 | +// it did so I needed to make that case safe too. |
| 228 | + if (!is_initialized) { |
| 229 | + return 0; |
| 230 | + } |
199 | 231 | opal_atomic_lock(&release_lock); |
200 | 232 |
|
201 | 233 | /* make sure the callback isn't already in the list */ |
202 | 234 | OPAL_LIST_FOREACH (cbitem, &release_cb_list, callback_list_item_t) { |
203 | | - if (cbitem->cbfunc == func) { |
| 235 | + if (cbitem && cbitem->cbfunc == func) { |
204 | 236 | opal_list_remove_item(&release_cb_list, (opal_list_item_t *) cbitem); |
205 | 237 | found_item = cbitem; |
206 | 238 | ret = OPAL_SUCCESS; |
|
0 commit comments