Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,10 @@ class kmp_stats_list;
#define UNLIKELY(x) (x)
#endif

#ifndef LIKELY
#define LIKELY(x) (x)
#endif

// Affinity format function
#include "kmp_str.h"

Expand Down Expand Up @@ -1759,8 +1763,6 @@ typedef int kmp_itt_mark_t;
#define KMP_ITT_DEBUG 0
#endif /* USE_ITT_BUILD */

typedef kmp_int32 kmp_critical_name[8];

/*!
@ingroup PARALLEL
The type for a microtask which gets passed to @ref __kmpc_fork_call().
Expand Down Expand Up @@ -3510,6 +3512,8 @@ extern int __kmp_abort_delay;
extern int __kmp_need_register_atfork_specified;
extern int __kmp_need_register_atfork; /* At initialization, call pthread_atfork
to install fork handler */
extern int __kmp_already_registered_atfork; /* Do not register atfork twice */
extern int __kmp_in_atexit; /*Denote that we are in the atexit handler*/
extern int __kmp_gtid_mode; /* Method of getting gtid, values:
0 - not set, will be set at runtime
1 - using stack search
Expand Down
7 changes: 7 additions & 0 deletions openmp/runtime/src/kmp_csupport.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,11 @@ void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
kmp_int32 num_threads) {
KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
global_tid, num_threads));
// we'll do middle initialize first, as otherwise the assert on global_tid can
// fail when omp is not initialized and this function is called
if (!TCR_4(__kmp_init_middle)) {
__kmp_middle_initialize();
}
__kmp_assert_valid_gtid(global_tid);
__kmp_push_num_threads(loc, global_tid, num_threads);
}
Expand Down Expand Up @@ -1165,6 +1170,8 @@ __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
// KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
}
KMP_DEBUG_ASSERT(*lck != NULL);
// save the reverse critical section global lock reference
ilk->rev_ptr_critSec = crit;
}

// Fast-path acquire tas lock
Expand Down
6 changes: 6 additions & 0 deletions openmp/runtime/src/kmp_global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,12 @@ int __kmp_need_register_atfork =
TRUE; /* At initialization, call pthread_atfork to install fork handler */
int __kmp_need_register_atfork_specified = TRUE;

/* We do not want to repeatedly register the atfork handler, because since we
* lock things (in __kmp_forkjoin_lock()) in the prepare handler, if the same
* prepare handler gets called multiple times, then it will always deadlock */
int __kmp_already_registered_atfork = FALSE;
int __kmp_in_atexit = FALSE; /*Denote that we are in the atexit handler*/

int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */
int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */
int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */
Expand Down
6 changes: 6 additions & 0 deletions openmp/runtime/src/kmp_lock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3431,6 +3431,9 @@ void __kmp_cleanup_indirect_user_locks() {
ll));
__kmp_free(ll->lock);
ll->lock = NULL;
// reset the reverse critical section pointer to 0
if (ll->rev_ptr_critSec && LIKELY(!__kmp_in_atexit))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rev_ptr_critSec is not initialized. Set it to nullptr in __kmp_allocate_indirect_lock

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In __kmp_allocate_indirect_lock() the new lock tables are allocated with multiples of sizeof(kmp_indirect_lock_t), and I believe __kmp_allocate() will memset the allocated memory to 0. So I believe rev_ptr_critSec is technically initialized to 0 in this case?

I can also explicitly initialize it to nullptr if that makes the code easier to reason about.

memset(ll->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
}
__kmp_indirect_lock_pool[k] = NULL;
}
Expand All @@ -3449,6 +3452,9 @@ void __kmp_cleanup_indirect_user_locks() {
"from table\n",
l));
__kmp_free(l->lock);
// reset the reverse critical section pointer to 0
if (l->rev_ptr_critSec && LIKELY(!__kmp_in_atexit))
memset(l->rev_ptr_critSec, 0, sizeof(kmp_critical_name));
}
}
__kmp_free(ptr->table[row]);
Expand Down
11 changes: 11 additions & 0 deletions openmp/runtime/src/kmp_lock.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ extern "C" {
struct ident;
typedef struct ident ident_t;

// moved the typedef kmp_critical_name from kmp.h to here.
typedef kmp_int32 kmp_critical_name[8];

// End of copied code.
// ----------------------------------------------------------------------------

Expand Down Expand Up @@ -1126,6 +1129,14 @@ typedef enum {
typedef struct {
kmp_user_lock_p lock;
kmp_indirect_locktag_t type;
// NOTE: when a `#pragma omp critical` lock gets created, the corresponding
// critical section global locks needs to point to a lock when we reset the
// locks (via omp_pause_resource_all(omp_pause_hard)), these critical section
// global lock pointers need to also be reset back to NULL (in
// __kmp_cleanup_indirect_user_locks()) however, we will not reset the
// `rev_ptr_critSec` lock during the atexit() cleanup handler, since the
// memory of `rev_ptr_critSec` is/could be freed already
kmp_critical_name *rev_ptr_critSec;
} kmp_indirect_lock_t;

// Function tables for direct locks. Set/unset/test differentiate functions
Expand Down
16 changes: 11 additions & 5 deletions openmp/runtime/src/kmp_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6139,6 +6139,7 @@ void __kmp_internal_end_atexit(void) {
Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there
is nothing. Thus, the workaround is applicable only for Windows static
stat library. */
__kmp_in_atexit = TRUE;
__kmp_internal_end_library(-1);
#if KMP_OS_WINDOWS
__kmp_close_console();
Expand Down Expand Up @@ -6952,9 +6953,9 @@ void __kmp_unregister_library(void) {
value = __kmp_env_get(name);
#endif

KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
// if omp is not initialized and we exit, then we don't need to free anything
if (__kmp_registration_flag != 0 && __kmp_registration_str != NULL) {
if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
// Ok, this is our variable. Delete it.
#if defined(KMP_USE_SHM)
if (__kmp_shm_available) {
Expand All @@ -6967,16 +6968,17 @@ void __kmp_unregister_library(void) {
#else
__kmp_env_unset(name);
#endif
}
}

#if defined(KMP_USE_SHM)
if (shm_name)
KMP_INTERNAL_FREE(shm_name);
if (temp_reg_status_file_name)
KMP_INTERNAL_FREE(temp_reg_status_file_name);
#endif

KMP_INTERNAL_FREE(__kmp_registration_str);
}

KMP_INTERNAL_FREE(value);
KMP_INTERNAL_FREE(name);

Expand Down Expand Up @@ -8343,6 +8345,10 @@ void __kmp_cleanup(void) {

__kmpc_destroy_allocator(KMP_GTID_SHUTDOWN, __kmp_def_allocator);
__kmp_def_allocator = omp_default_mem_alloc;
#ifdef KMP_TDATA_GTID
/*reset __kmp_gtid to initial value*/
__kmp_gtid = KMP_GTID_DNE;
#endif
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thread 0 should not drop it's gtid

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do I know we are thread 0? Do I just check if __kmp_gtid == 0 (as __kmp_gtid is declared as thread local storage) and if thats the case, I don't reset it?

I chose to reset __kmp_gtid to KMP_GTID_DNE because it is initialized to KMP_GTID_DNE in kmp_global.cpp, so I thought resetting to its default value was ok.

Can you please point me how this can cause an issue in the code, so that I can understand the code base better?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you look at code generated for an OpenMP program (IR or disassembled objdump), you will see that each function that contains __kmpc* calls initially gets the gtid using __kmpc_global_thread_num, which triggers serial initialization. This gtid is then passed to many __kmpc calls. You already ran into the issue that __kmpc_push_num_threads assumes that serial initialization has happened. I think, that the runtime must be in serial initialized state after omp_pause_resource[_all] to not break the assumptions of compiler+runtime implementation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right I did realize clang inserts __kmpc_global_thread_num() at the beginning of functions.

But how does not resetting __kmp_gtid to KMP_GTID_DNE for thread 0 imply that the runtime in serial initialized state after omp_pause_resource calls? It sounds like we want to call __kmpc_global_thread_num() (or something equivalent) right after omp_pause_resource to force serial initialized state?


KA_TRACE(10, ("__kmp_cleanup: exit\n"));
}
Expand Down
2 changes: 2 additions & 0 deletions openmp/runtime/src/ompt-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,11 @@ extern ompt_callbacks_active_t ompt_enabled;

#if KMP_OS_WINDOWS
#define UNLIKELY(x) (x)
#define LIKELY(x) (x)
#define OMPT_NOINLINE __declspec(noinline)
#else
#define UNLIKELY(x) __builtin_expect(!!(x), 0)
#define LIKELY(x) __builtin_expect(!!(x), 1)
#define OMPT_NOINLINE __attribute__((noinline))
#endif

Expand Down
4 changes: 3 additions & 1 deletion openmp/runtime/src/z_Linux_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1404,13 +1404,15 @@ static void __kmp_atfork_child(void) {
}

void __kmp_register_atfork(void) {
if (__kmp_need_register_atfork) {
// NOTE: we will not double register our fork handlers! It will cause deadlock
if (!__kmp_already_registered_atfork && __kmp_need_register_atfork) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change related to the pause_resource_all issue?

Copy link
Author

@haiyanghee haiyanghee Sep 4, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Technically yes, because if you see the test function test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all() I added in the unit tests, the original behaviour will actually register the atfork handlers twice (if the environment variable KMP_INIT_AT_FORK is explicitly set to 1), which causes a deadlock (since the atfork handlers are run twice, and inside the handler it will do locking).

There might be a cleaner way to prevent double atfork registration than adding another flag (I thought I can re-use the variable __kmp_need_register_atfork_specified, but I didn't since it looks like its only used for debug printing)

#if !KMP_OS_WASI
int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
__kmp_atfork_child);
KMP_CHECK_SYSFAIL("pthread_atfork", status);
#endif
__kmp_need_register_atfork = FALSE;
__kmp_already_registered_atfork = TRUE;
}
}

Expand Down
121 changes: 121 additions & 0 deletions openmp/runtime/test/api/omp_pause_resource.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,124 @@
// UNSUPPORTED: icc-18, icc-19

#include <stdio.h>
#include <assert.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/wait.h>
#include "omp_testsuite.h"

#define NUM_THREADS 3

void doOmpWorkWithCritical(int *a_lockCtr, int *b_lockCtr) {
#pragma omp parallel num_threads(NUM_THREADS)
{
#pragma omp critical(a_lock)
{ *a_lockCtr = *a_lockCtr + 1; }
#pragma omp critical(b_lock)
{ *b_lockCtr = *b_lockCtr + 1; }
}
}

void test_omp_critical_after_omp_hard_pause_resource_all() {
int a_lockCtr = 0, b_lockCtr = 0;

// use omp to do some work
doOmpWorkWithCritical(&a_lockCtr, &b_lockCtr);
assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS);
a_lockCtr = b_lockCtr = 0; // reset the counters

// omp hard pause should succeed
int rc = omp_pause_resource_all(omp_pause_hard);
assert(rc == 0);

// we should not segfault inside the critical sections of doOmpWork()
doOmpWorkWithCritical(&a_lockCtr, &b_lockCtr);
assert(a_lockCtr == NUM_THREADS && b_lockCtr == NUM_THREADS);
}

void test_omp_get_thread_num_after_omp_hard_pause_resource_all() {
// omp_get_thread_num() should work, even if omp is not yet initialized
int n = omp_get_thread_num();
// called from serial region, omp_get_thread_num() should return 0
assert(n == 0);

// use omp to do some work, guarantees omp initialization
#pragma omp parallel num_threads(NUM_THREADS)
{}

// omp hard pause should succeed
int rc = omp_pause_resource_all(omp_pause_hard);
assert(rc == 0);

// omp_get_thread_num() should work again with no segfault
n = omp_get_thread_num();
// called from serial region, omp_get_thread_num() should return 0
assert(n == 0);
}

void test_omp_parallel_num_threads_after_omp_hard_pause_resource_all() {
// use omp to do some work
#pragma omp parallel num_threads(NUM_THREADS)
{}

// omp hard pause should succeed
int rc = omp_pause_resource_all(omp_pause_hard);
assert(rc == 0);

// this should not trigger any omp asserts
#pragma omp parallel num_threads(NUM_THREADS)
{}
}

void test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all() {
// explicitly set the KMP_INIT_AT_FORK environment variable to 1
setenv("KMP_INIT_AT_FORK", "1", 1);

// use omp to do some work
#pragma omp parallel for num_threads(NUM_THREADS)
for (int i = 0; i < NUM_THREADS; ++i) {
}

// omp hard pause should succeed
int rc = omp_pause_resource_all(omp_pause_hard);
assert(rc == 0);

// use omp to do some work
#pragma omp parallel for num_threads(NUM_THREADS)
for (int i = 0; i < NUM_THREADS; ++i) {
}

// we'll fork .. this shouldn't deadlock
int p = fork();

if (!p) {
exit(0); // child simply does nothing and exits
}

waitpid(p, NULL, 0);

unsetenv("KMP_INIT_AT_FORK");
}

void test_fork_child_exiting_after_omp_hard_pause_resource_all() {
// use omp to do some work
#pragma omp parallel num_threads(NUM_THREADS)
{}

// omp hard pause should succeed
int rc = omp_pause_resource_all(omp_pause_hard);
assert(rc == 0);

int p = fork();

if (!p) {
// child should be able to exit properly without assert failures
exit(0);
}

waitpid(p, NULL, 0);
}

int test_omp_pause_resource() {
int fails, nthreads, my_dev;

Expand Down Expand Up @@ -57,6 +173,11 @@ int main() {
if (!test_omp_pause_resource()) {
num_failed++;
}
test_omp_critical_after_omp_hard_pause_resource_all();
test_omp_get_thread_num_after_omp_hard_pause_resource_all();
test_omp_parallel_num_threads_after_omp_hard_pause_resource_all();
test_KMP_INIT_AT_FORK_with_fork_after_omp_hard_pause_resource_all();
test_fork_child_exiting_after_omp_hard_pause_resource_all();
}
return num_failed;
}
Loading