diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 83afc0e83f231..24db61c288822 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -1759,8 +1759,6 @@ typedef int kmp_itt_mark_t; #define KMP_ITT_DEBUG 0 #endif /* USE_ITT_BUILD */ -typedef kmp_int32 kmp_critical_name[8]; - /*! @ingroup PARALLEL The type for a microtask which gets passed to @ref __kmpc_fork_call(). @@ -3510,6 +3508,8 @@ extern int __kmp_abort_delay; extern int __kmp_need_register_atfork_specified; extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork to install fork handler */ +extern int __kmp_already_registered_atfork; /* Do not register atfork twice */ +extern int __kmp_in_atexit; /*Denote that we are in the atexit handler*/ extern int __kmp_gtid_mode; /* Method of getting gtid, values: 0 - not set, will be set at runtime 1 - using stack search @@ -4561,6 +4561,7 @@ static inline void __kmp_resume_if_hard_paused() { __kmp_pause_status = kmp_not_paused; } } +extern void __kmp_hard_pause_reinitialize(const bool in_child_atfork_andler); extern void __kmp_omp_display_env(int verbose); diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 3ca32ba583fe2..075a90b9de7ee 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -1165,6 +1165,8 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); } KMP_DEBUG_ASSERT(*lck != NULL); + // save the reverse critical section global lock reference + ilk->rev_ptr_critSec = crit; } // Fast-path acquire tas lock diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 323d13e948b42..df64d40a086ef 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -414,6 +414,12 @@ int __kmp_need_register_atfork = TRUE; /* At initialization, call pthread_atfork to install fork handler */ int __kmp_need_register_atfork_specified = TRUE; +/* We do not want to repeatedly register the atfork handler, because since we + * lock things (in __kmp_forkjoin_lock()) in the prepare handler, if the same + * prepare handler gets called multiple times, then it will always deadlock */ +int __kmp_already_registered_atfork = FALSE; +int __kmp_in_atexit = FALSE; /*Denote that we are in the atexit handler*/ + int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */ int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */ int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index fd1300352e95b..626b98c7ce92b 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -3431,6 +3431,9 @@ void __kmp_cleanup_indirect_user_locks() { ll)); __kmp_free(ll->lock); ll->lock = NULL; + // reset the reverse critical section pointer to 0 + if (ll->rev_ptr_critSec && !UNLIKELY(__kmp_in_atexit)) + memset(ll->rev_ptr_critSec, 0, sizeof(kmp_critical_name)); } __kmp_indirect_lock_pool[k] = NULL; } @@ -3449,6 +3452,9 @@ void __kmp_cleanup_indirect_user_locks() { "from table\n", l)); __kmp_free(l->lock); + // reset the reverse critical section pointer to 0 + if (l->rev_ptr_critSec && !UNLIKELY(__kmp_in_atexit)) + memset(l->rev_ptr_critSec, 0, sizeof(kmp_critical_name)); } } __kmp_free(ptr->table[row]); diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index 6202f3d617cc5..fc71bafd47e39 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -38,6 +38,9 @@ extern "C" { struct ident; typedef struct ident ident_t; +// moved the typedef kmp_critical_name from kmp.h to here. +typedef kmp_int32 kmp_critical_name[8]; + // End of copied code. // ---------------------------------------------------------------------------- @@ -1126,6 +1129,14 @@ typedef enum { typedef struct { kmp_user_lock_p lock; kmp_indirect_locktag_t type; + // NOTE: when a `#pragma omp critical` lock gets created, the corresponding + // critical section global locks needs to point to a lock when we reset the + // locks (via omp_pause_resource_all(omp_pause_hard)), these critical section + // global lock pointers need to also be reset back to NULL (in + // __kmp_cleanup_indirect_user_locks()) however, we will not reset the + // `rev_ptr_critSec` lock during the atexit() cleanup handler, since the + // memory of `rev_ptr_critSec` is/could be freed already + kmp_critical_name *rev_ptr_critSec; } kmp_indirect_lock_t; // Function tables for direct locks. Set/unset/test differentiate functions diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 48e29c9f9fe45..266645e22c6fb 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -6139,6 +6139,7 @@ void __kmp_internal_end_atexit(void) { Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the workaround is applicable only for Windows static stat library. */ + __kmp_in_atexit = TRUE; __kmp_internal_end_library(-1); #if KMP_OS_WINDOWS __kmp_close_console(); @@ -6952,9 +6953,9 @@ void __kmp_unregister_library(void) { value = __kmp_env_get(name); #endif - KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); - KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); - if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { + // if omp is not initialized and we exit, then we don't need to free anything + if (__kmp_registration_flag != 0 && __kmp_registration_str != NULL) { + if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { // Ok, this is our variable. Delete it. #if defined(KMP_USE_SHM) if (__kmp_shm_available) { @@ -6967,7 +6968,7 @@ void __kmp_unregister_library(void) { #else __kmp_env_unset(name); #endif - } + } #if defined(KMP_USE_SHM) if (shm_name) @@ -6975,8 +6976,9 @@ void __kmp_unregister_library(void) { if (temp_reg_status_file_name) KMP_INTERNAL_FREE(temp_reg_status_file_name); #endif - KMP_INTERNAL_FREE(__kmp_registration_str); + } + KMP_INTERNAL_FREE(value); KMP_INTERNAL_FREE(name); @@ -9055,6 +9057,9 @@ void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; } void __kmp_hard_pause() { __kmp_pause_status = kmp_hard_paused; __kmp_internal_end_thread(-1); + // TODO: we'll do the same thing as child atfork handler, since we need to + // serially initialize the runtime library after __kmp_hard_pause() + __kmp_hard_pause_reinitialize(false); } // Soft resume sets __kmp_pause_status, and wakes up all threads. @@ -9361,6 +9366,97 @@ void __kmp_set_nesting_mode_threads() { set__max_active_levels(thread, __kmp_nesting_mode_nlevels); } +void __kmp_hard_pause_reinitialize(const bool in_child_atfork_andler) { +#if KMP_AFFINITY_SUPPORTED +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \ + KMP_OS_AIX + // reset the affinity in the child to the initial thread + // affinity in the parent + kmp_set_thread_affinity_mask_initial(); +#endif + // Set default not to bind threads tightly in the child (we're expecting + // over-subscription after the fork and this can improve things for + // scripting languages that use OpenMP inside process-parallel code). + if (__kmp_nested_proc_bind.bind_types != NULL) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } + for (kmp_affinity_t *affinity : __kmp_affinities) + *affinity = KMP_AFFINITY_INIT(affinity->env_var); + __kmp_affin_fullMask = nullptr; + __kmp_affin_origMask = nullptr; + __kmp_topology = nullptr; +#endif // KMP_AFFINITY_SUPPORTED + + // TODO: resetting these global variables might be not needed if we are not in + // child handler as `__kmp_cleanup()` would have most likely reset them + // already + +#if KMP_USE_MONITOR + __kmp_init_monitor = 0; +#endif + __kmp_init_parallel = FALSE; + __kmp_init_middle = FALSE; + __kmp_init_serial = FALSE; + TCW_4(__kmp_init_gtid, FALSE); + __kmp_init_common = FALSE; + + TCW_4(__kmp_init_user_locks, FALSE); +#if !KMP_USE_DYNAMIC_LOCK + __kmp_user_lock_table.used = 1; + __kmp_user_lock_table.allocated = 0; + __kmp_user_lock_table.table = NULL; + __kmp_lock_blocks = NULL; +#endif + + __kmp_all_nth = 0; + TCW_4(__kmp_nth, 0); + + __kmp_thread_pool = NULL; + __kmp_thread_pool_insert_pt = NULL; + __kmp_team_pool = NULL; + + // The threadprivate cache will be cleared in `__kmp_cleanup()` + if (in_child_atfork_andler) { + /* Must actually zero all the *cache arguments passed to + __kmpc_threadprivate here so threadprivate doesn't use stale data */ + KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n", + __kmp_threadpriv_cache_list)); + + while (__kmp_threadpriv_cache_list != NULL) { + + if (*__kmp_threadpriv_cache_list->addr != NULL) { + KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n", + &(*__kmp_threadpriv_cache_list->addr))); + + *__kmp_threadpriv_cache_list->addr = NULL; + } + __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next; + } + + /* reset statically initialized locks */ + __kmp_init_bootstrap_lock(&__kmp_initz_lock); + __kmp_init_bootstrap_lock(&__kmp_stdio_lock); + __kmp_init_bootstrap_lock(&__kmp_console_lock); + __kmp_init_bootstrap_lock(&__kmp_task_team_lock); + } + +#if USE_ITT_BUILD + __kmp_itt_reset(); // reset ITT's global state +#endif /* USE_ITT_BUILD */ + + { + // Child process often get terminated without any use of OpenMP. That might + // cause mapped shared memory file to be left unattended. Thus we postpone + // library registration till middle initialization in the child process. + + // After we do a `__kmpc_pause_resource()`, the omp runtime must also be in + // serially initialized state in order to not break the assumptions of + // compiler+runtime implementation + __kmp_need_register_serial = FALSE; + __kmp_serial_initialize(); + } +} + #if ENABLE_LIBOMPTARGET void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task, void *event) = NULL; diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 368c0b6e872cc..cd48fd3fe9506 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -1312,86 +1312,11 @@ static void __kmp_atfork_child(void) { ++__kmp_fork_count; -#if KMP_AFFINITY_SUPPORTED -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \ - KMP_OS_AIX - // reset the affinity in the child to the initial thread - // affinity in the parent - kmp_set_thread_affinity_mask_initial(); -#endif - // Set default not to bind threads tightly in the child (we're expecting - // over-subscription after the fork and this can improve things for - // scripting languages that use OpenMP inside process-parallel code). - if (__kmp_nested_proc_bind.bind_types != NULL) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } - for (kmp_affinity_t *affinity : __kmp_affinities) - *affinity = KMP_AFFINITY_INIT(affinity->env_var); - __kmp_affin_fullMask = nullptr; - __kmp_affin_origMask = nullptr; - __kmp_topology = nullptr; -#endif // KMP_AFFINITY_SUPPORTED - -#if KMP_USE_MONITOR - __kmp_init_monitor = 0; -#endif - __kmp_init_parallel = FALSE; - __kmp_init_middle = FALSE; - __kmp_init_serial = FALSE; - TCW_4(__kmp_init_gtid, FALSE); - __kmp_init_common = FALSE; - - TCW_4(__kmp_init_user_locks, FALSE); -#if !KMP_USE_DYNAMIC_LOCK - __kmp_user_lock_table.used = 1; - __kmp_user_lock_table.allocated = 0; - __kmp_user_lock_table.table = NULL; - __kmp_lock_blocks = NULL; -#endif - - __kmp_all_nth = 0; - TCW_4(__kmp_nth, 0); - - __kmp_thread_pool = NULL; - __kmp_thread_pool_insert_pt = NULL; - __kmp_team_pool = NULL; - - /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate - here so threadprivate doesn't use stale data */ - KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n", - __kmp_threadpriv_cache_list)); - - while (__kmp_threadpriv_cache_list != NULL) { - - if (*__kmp_threadpriv_cache_list->addr != NULL) { - KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n", - &(*__kmp_threadpriv_cache_list->addr))); - - *__kmp_threadpriv_cache_list->addr = NULL; - } - __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next; - } + // re-use the same re-initialization code as __kmp_hard_reset() + __kmp_hard_pause_reinitialize(true); __kmp_init_runtime = FALSE; - /* reset statically initialized locks */ - __kmp_init_bootstrap_lock(&__kmp_initz_lock); - __kmp_init_bootstrap_lock(&__kmp_stdio_lock); - __kmp_init_bootstrap_lock(&__kmp_console_lock); - __kmp_init_bootstrap_lock(&__kmp_task_team_lock); - -#if USE_ITT_BUILD - __kmp_itt_reset(); // reset ITT's global state -#endif /* USE_ITT_BUILD */ - - { - // Child process often get terminated without any use of OpenMP. That might - // cause mapped shared memory file to be left unattended. Thus we postpone - // library registration till middle initialization in the child process. - __kmp_need_register_serial = FALSE; - __kmp_serial_initialize(); - } - /* This is necessary to make sure no stale data is left around */ /* AC: customers complain that we use unsafe routines in the atfork handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen @@ -1404,13 +1329,15 @@ static void __kmp_atfork_child(void) { } void __kmp_register_atfork(void) { - if (__kmp_need_register_atfork) { + // NOTE: we will not double register our fork handlers! It will cause deadlock + if (!__kmp_already_registered_atfork && __kmp_need_register_atfork) { #if !KMP_OS_WASI int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent, __kmp_atfork_child); KMP_CHECK_SYSFAIL("pthread_atfork", status); #endif __kmp_need_register_atfork = FALSE; + __kmp_already_registered_atfork = TRUE; } } diff --git a/openmp/runtime/test/api/omp_pause_resource.c b/openmp/runtime/test/api/omp_pause_resource.c index e4aaa51861b8e..6154377b9c03a 100644 --- a/openmp/runtime/test/api/omp_pause_resource.c +++ b/openmp/runtime/test/api/omp_pause_resource.c @@ -4,8 +4,132 @@ // UNSUPPORTED: icc-18, icc-19 #include +#include +#include +#include +#include #include "omp_testsuite.h" +#define NUM_THREADS 3 + +void doOmpWorkWithCritical(int *a_lockCtr, int *b_lockCtr) { +#pragma omp parallel num_threads(NUM_THREADS) + { +#pragma omp critical(a_lock) + { + *a_lockCtr = *a_lockCtr + 1; + } +#pragma omp critical(b_lock) + { + *b_lockCtr = *b_lockCtr + 1; + } + } +} + +void test_omp_critical_after_omp_hard_pause_resource_all() { + int a_lockCtr = 0, b_lockCtr = 0; + + // use omp to do some work + doOmpWorkWithCritical(&a_lockCtr, &b_lockCtr); + assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS); + a_lockCtr = b_lockCtr = 0; // reset the counters + + // omp hard pause should succeed + int rc = omp_pause_resource_all(omp_pause_hard); + assert(rc == 0); + + // we should not segfault inside the critical sections of doOmpWork() + doOmpWorkWithCritical(&a_lockCtr, &b_lockCtr); + assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS); +} + +void test_omp_get_thread_num_after_omp_hard_pause_resource_all() { + // omp_get_thread_num() should work, even if omp is not yet initialized + int n = omp_get_thread_num(); + // called from serial region, omp_get_thread_num() should return 0 + assert(n == 0); + +// use omp to do some work, guarantees omp initialization +#pragma omp parallel num_threads(NUM_THREADS) + { + } + + // omp hard pause should succeed + int rc = omp_pause_resource_all(omp_pause_hard); + assert(rc == 0); + + // omp_get_thread_num() should work again with no segfault + n = omp_get_thread_num(); + // called from serial region, omp_get_thread_num() should return 0 + assert(n == 0); +} + +void test_omp_parallel_num_threads_after_omp_hard_pause_resource_all() { +// use omp to do some work +#pragma omp parallel num_threads(NUM_THREADS) + { + } + + // omp hard pause should succeed + int rc = omp_pause_resource_all(omp_pause_hard); + assert(rc == 0); + +// this should not trigger any omp asserts +#pragma omp parallel num_threads(NUM_THREADS) + { + } +} + +void test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all() { + // explicitly set the KMP_INIT_AT_FORK environment variable to 1 + setenv("KMP_INIT_AT_FORK", "1", 1); + +// use omp to do some work +#pragma omp parallel for num_threads(NUM_THREADS) + for (int i = 0; i < NUM_THREADS; ++i) { + } + + // omp hard pause should succeed + int rc = omp_pause_resource_all(omp_pause_hard); + assert(rc == 0); + +// use omp to do some work +#pragma omp parallel for num_threads(NUM_THREADS) + for (int i = 0; i < NUM_THREADS; ++i) { + } + + // we'll fork .. this shouldn't deadlock + int p = fork(); + + if (!p) { + exit(0); // child simply does nothing and exits + } + + waitpid(p, NULL, 0); + + unsetenv("KMP_INIT_AT_FORK"); +} + +void test_fork_child_exiting_after_omp_hard_pause_resource_all() { +// use omp to do some work +#pragma omp parallel num_threads(NUM_THREADS) + { + } + + // omp hard pause should succeed + int rc = omp_pause_resource_all(omp_pause_hard); + assert(rc == 0); + + int p = fork(); + + if (!p) { + // child should be able to exit properly without assert failures + exit(0); + } + + waitpid(p, NULL, 0); +} + int test_omp_pause_resource() { int fails, nthreads, my_dev; @@ -57,6 +181,11 @@ int main() { if (!test_omp_pause_resource()) { num_failed++; } + test_omp_critical_after_omp_hard_pause_resource_all(); + test_omp_get_thread_num_after_omp_hard_pause_resource_all(); + test_omp_parallel_num_threads_after_omp_hard_pause_resource_all(); + test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all(); + test_fork_child_exiting_after_omp_hard_pause_resource_all(); } return num_failed; }