Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1759,8 +1759,6 @@ typedef int kmp_itt_mark_t;
#define KMP_ITT_DEBUG 0
#endif /* USE_ITT_BUILD */

typedef kmp_int32 kmp_critical_name[8];

/*!
@ingroup PARALLEL
The type for a microtask which gets passed to @ref __kmpc_fork_call().
Expand Down Expand Up @@ -3510,6 +3508,8 @@ extern int __kmp_abort_delay;
extern int __kmp_need_register_atfork_specified;
extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
to install fork handler */
extern int __kmp_already_registered_atfork; /* Do not register atfork twice */
extern int __kmp_in_atexit; /*Denote that we are in the atexit handler*/
extern int __kmp_gtid_mode; /* Method of getting gtid, values:
0 - not set, will be set at runtime
1 - using stack search
Expand Down Expand Up @@ -4561,6 +4561,7 @@ static inline void __kmp_resume_if_hard_paused() {
__kmp_pause_status = kmp_not_paused;
}
}
extern void __kmp_hard_pause_reinitialize(const bool in_child_atfork_andler);

extern void __kmp_omp_display_env(int verbose);

Expand Down
2 changes: 2 additions & 0 deletions openmp/runtime/src/kmp_csupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,8 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
// KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
}
KMP_DEBUG_ASSERT(*lck != NULL);
// save the reverse critical section global lock reference
ilk->rev_ptr_critSec = crit;
}

// Fast-path acquire tas lock
Expand Down
6 changes: 6 additions & 0 deletions openmp/runtime/src/kmp_global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,12 @@ int __kmp_need_register_atfork =
TRUE; /* At initialization, call pthread_atfork to install fork handler */
int __kmp_need_register_atfork_specified = TRUE;

/* We do not want to repeatedly register the atfork handler, because since we
* lock things (in __kmp_forkjoin_lock()) in the prepare handler, if the same
* prepare handler gets called multiple times, then it will always deadlock */
int __kmp_already_registered_atfork = FALSE;
int __kmp_in_atexit = FALSE; /*Denote that we are in the atexit handler*/

int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */
int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */
int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */
Expand Down
6 changes: 6 additions & 0 deletions openmp/runtime/src/kmp_lock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3431,6 +3431,9 @@ void __kmp_cleanup_indirect_user_locks() {
ll));
__kmp_free(ll->lock);
ll->lock = NULL;
// reset the reverse critical section pointer to 0
if (ll->rev_ptr_critSec && !UNLIKELY(__kmp_in_atexit))
memset(ll->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
}
__kmp_indirect_lock_pool[k] = NULL;
}
Expand All @@ -3449,6 +3452,9 @@ void __kmp_cleanup_indirect_user_locks() {
"from table\n",
l));
__kmp_free(l->lock);
// reset the reverse critical section pointer to 0
if (l->rev_ptr_critSec && !UNLIKELY(__kmp_in_atexit))
memset(l->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
}
}
__kmp_free(ptr->table[row]);
Expand Down
11 changes: 11 additions & 0 deletions openmp/runtime/src/kmp_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ extern "C" {
struct ident;
typedef struct ident ident_t;

// moved the typedef kmp_critical_name from kmp.h to here.
typedef kmp_int32 kmp_critical_name[8];

// End of copied code.
// ----------------------------------------------------------------------------

Expand Down Expand Up @@ -1126,6 +1129,14 @@ typedef enum {
typedef struct {
kmp_user_lock_p lock;
kmp_indirect_locktag_t type;
// NOTE: when a `#pragma omp critical` lock gets created, the corresponding
// critical section global locks needs to point to a lock when we reset the
// locks (via omp_pause_resource_all(omp_pause_hard)), these critical section
// global lock pointers need to also be reset back to NULL (in
// __kmp_cleanup_indirect_user_locks()) however, we will not reset the
// `rev_ptr_critSec` lock during the atexit() cleanup handler, since the
// memory of `rev_ptr_critSec` is/could be freed already
kmp_critical_name *rev_ptr_critSec;
} kmp_indirect_lock_t;

// Function tables for direct locks. Set/unset/test differentiate functions
Expand Down
106 changes: 101 additions & 5 deletions openmp/runtime/src/kmp_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6139,6 +6139,7 @@ void __kmp_internal_end_atexit(void) {
Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
is nothing. Thus, the workaround is applicable only for Windows static
stat library. */
__kmp_in_atexit = TRUE;
__kmp_internal_end_library(-1);
#if KMP_OS_WINDOWS
__kmp_close_console();
Expand Down Expand Up @@ -6952,9 +6953,9 @@ void __kmp_unregister_library(void) {
value = __kmp_env_get(name);
#endif

KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
// if omp is not initialized and we exit, then we don't need to free anything
if (__kmp_registration_flag != 0 && __kmp_registration_str != NULL) {
if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
// Ok, this is our variable. Delete it.
#if defined(KMP_USE_SHM)
if (__kmp_shm_available) {
Expand All @@ -6967,16 +6968,17 @@ void __kmp_unregister_library(void) {
#else
__kmp_env_unset(name);
#endif
}
}

#if defined(KMP_USE_SHM)
if (shm_name)
KMP_INTERNAL_FREE(shm_name);
if (temp_reg_status_file_name)
KMP_INTERNAL_FREE(temp_reg_status_file_name);
#endif

KMP_INTERNAL_FREE(__kmp_registration_str);
}

KMP_INTERNAL_FREE(value);
KMP_INTERNAL_FREE(name);

Expand Down Expand Up @@ -9055,6 +9057,9 @@ void __kmp_soft_pause() { __kmp_pause_status = kmp_soft_paused; }
void __kmp_hard_pause() {
__kmp_pause_status = kmp_hard_paused;
__kmp_internal_end_thread(-1);
// TODO: we'll do the same thing as child atfork handler, since we need to
// serially initialize the runtime library after __kmp_hard_pause()
__kmp_hard_pause_reinitialize(false);
}

// Soft resume sets __kmp_pause_status, and wakes up all threads.
Expand Down Expand Up @@ -9361,6 +9366,97 @@ void __kmp_set_nesting_mode_threads() {
set__max_active_levels(thread, __kmp_nesting_mode_nlevels);
}

void __kmp_hard_pause_reinitialize(const bool in_child_atfork_andler) {
#if KMP_AFFINITY_SUPPORTED
#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
KMP_OS_AIX
// reset the affinity in the child to the initial thread
// affinity in the parent
kmp_set_thread_affinity_mask_initial();
#endif
// Set default not to bind threads tightly in the child (we're expecting
// over-subscription after the fork and this can improve things for
// scripting languages that use OpenMP inside process-parallel code).
if (__kmp_nested_proc_bind.bind_types != NULL) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
}
for (kmp_affinity_t *affinity : __kmp_affinities)
*affinity = KMP_AFFINITY_INIT(affinity->env_var);
__kmp_affin_fullMask = nullptr;
__kmp_affin_origMask = nullptr;
__kmp_topology = nullptr;
#endif // KMP_AFFINITY_SUPPORTED

// TODO: resetting these global variables might be not needed if we are not in
// child handler as `__kmp_cleanup()` would have most likely reset them
// already

#if KMP_USE_MONITOR
__kmp_init_monitor = 0;
#endif
__kmp_init_parallel = FALSE;
__kmp_init_middle = FALSE;
__kmp_init_serial = FALSE;
TCW_4(__kmp_init_gtid, FALSE);
__kmp_init_common = FALSE;

TCW_4(__kmp_init_user_locks, FALSE);
#if !KMP_USE_DYNAMIC_LOCK
__kmp_user_lock_table.used = 1;
__kmp_user_lock_table.allocated = 0;
__kmp_user_lock_table.table = NULL;
__kmp_lock_blocks = NULL;
#endif

__kmp_all_nth = 0;
TCW_4(__kmp_nth, 0);

__kmp_thread_pool = NULL;
__kmp_thread_pool_insert_pt = NULL;
__kmp_team_pool = NULL;

// The threadprivate cache will be cleared in `__kmp_cleanup()`
if (in_child_atfork_andler) {
/* Must actually zero all the *cache arguments passed to
__kmpc_threadprivate here so threadprivate doesn't use stale data */
KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
__kmp_threadpriv_cache_list));

while (__kmp_threadpriv_cache_list != NULL) {

if (*__kmp_threadpriv_cache_list->addr != NULL) {
KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
&(*__kmp_threadpriv_cache_list->addr)));

*__kmp_threadpriv_cache_list->addr = NULL;
}
__kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
}

/* reset statically initialized locks */
__kmp_init_bootstrap_lock(&__kmp_initz_lock);
__kmp_init_bootstrap_lock(&__kmp_stdio_lock);
__kmp_init_bootstrap_lock(&__kmp_console_lock);
__kmp_init_bootstrap_lock(&__kmp_task_team_lock);
}

#if USE_ITT_BUILD
__kmp_itt_reset(); // reset ITT's global state
#endif /* USE_ITT_BUILD */

{
// Child process often get terminated without any use of OpenMP. That might
// cause mapped shared memory file to be left unattended. Thus we postpone
// library registration till middle initialization in the child process.

// After we do a `__kmpc_pause_resource()`, the omp runtime must also be in
// serially initialized state in order to not break the assumptions of
// compiler+runtime implementation
__kmp_need_register_serial = FALSE;
__kmp_serial_initialize();
}
}

#if ENABLE_LIBOMPTARGET
void (*kmp_target_sync_cb)(ident_t *loc_ref, int gtid, void *current_task,
void *event) = NULL;
Expand Down
83 changes: 5 additions & 78 deletions openmp/runtime/src/z_Linux_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1312,86 +1312,11 @@ static void __kmp_atfork_child(void) {

++__kmp_fork_count;

#if KMP_AFFINITY_SUPPORTED
#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DRAGONFLY || \
KMP_OS_AIX
// reset the affinity in the child to the initial thread
// affinity in the parent
kmp_set_thread_affinity_mask_initial();
#endif
// Set default not to bind threads tightly in the child (we're expecting
// over-subscription after the fork and this can improve things for
// scripting languages that use OpenMP inside process-parallel code).
if (__kmp_nested_proc_bind.bind_types != NULL) {
__kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
}
for (kmp_affinity_t *affinity : __kmp_affinities)
*affinity = KMP_AFFINITY_INIT(affinity->env_var);
__kmp_affin_fullMask = nullptr;
__kmp_affin_origMask = nullptr;
__kmp_topology = nullptr;
#endif // KMP_AFFINITY_SUPPORTED

#if KMP_USE_MONITOR
__kmp_init_monitor = 0;
#endif
__kmp_init_parallel = FALSE;
__kmp_init_middle = FALSE;
__kmp_init_serial = FALSE;
TCW_4(__kmp_init_gtid, FALSE);
__kmp_init_common = FALSE;

TCW_4(__kmp_init_user_locks, FALSE);
#if !KMP_USE_DYNAMIC_LOCK
__kmp_user_lock_table.used = 1;
__kmp_user_lock_table.allocated = 0;
__kmp_user_lock_table.table = NULL;
__kmp_lock_blocks = NULL;
#endif

__kmp_all_nth = 0;
TCW_4(__kmp_nth, 0);

__kmp_thread_pool = NULL;
__kmp_thread_pool_insert_pt = NULL;
__kmp_team_pool = NULL;

/* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
here so threadprivate doesn't use stale data */
KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
__kmp_threadpriv_cache_list));

while (__kmp_threadpriv_cache_list != NULL) {

if (*__kmp_threadpriv_cache_list->addr != NULL) {
KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
&(*__kmp_threadpriv_cache_list->addr)));

*__kmp_threadpriv_cache_list->addr = NULL;
}
__kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
}
// re-use the same re-initialization code as __kmp_hard_reset()
__kmp_hard_pause_reinitialize(true);

__kmp_init_runtime = FALSE;

/* reset statically initialized locks */
__kmp_init_bootstrap_lock(&__kmp_initz_lock);
__kmp_init_bootstrap_lock(&__kmp_stdio_lock);
__kmp_init_bootstrap_lock(&__kmp_console_lock);
__kmp_init_bootstrap_lock(&__kmp_task_team_lock);

#if USE_ITT_BUILD
__kmp_itt_reset(); // reset ITT's global state
#endif /* USE_ITT_BUILD */

{
// Child process often get terminated without any use of OpenMP. That might
// cause mapped shared memory file to be left unattended. Thus we postpone
// library registration till middle initialization in the child process.
__kmp_need_register_serial = FALSE;
__kmp_serial_initialize();
}

/* This is necessary to make sure no stale data is left around */
/* AC: customers complain that we use unsafe routines in the atfork
handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
Expand All @@ -1404,13 +1329,15 @@ static void __kmp_atfork_child(void) {
}

void __kmp_register_atfork(void) {
if (__kmp_need_register_atfork) {
// NOTE: we will not double register our fork handlers! It will cause deadlock
if (!__kmp_already_registered_atfork && __kmp_need_register_atfork) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change related to the pause_resource_all issue?

Copy link
Author

@haiyanghee haiyanghee Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically yes, because if you see the test function test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all() I added in the unit tests, the original behaviour will actually register the atfork handlers twice (if the environment variable KMP_INIT_AT_FORK is explicitly set to 1), which causes a deadlock (since the atfork handlers are run twice, and inside the handler it will do locking).

There might be a cleaner way to prevent double atfork registration than adding another flag (I thought I can re-use the variable __kmp_need_register_atfork_specified, but I didn't since it looks like its only used for debug printing)

#if !KMP_OS_WASI
int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
__kmp_atfork_child);
KMP_CHECK_SYSFAIL("pthread_atfork", status);
#endif
__kmp_need_register_atfork = FALSE;
__kmp_already_registered_atfork = TRUE;
}
}

Expand Down
Loading