Skip to content

Commit 4ea80bc

Browse files
committed
Updates, see llvm#82274
1 parent 9fc9269 commit 4ea80bc

File tree

6 files changed

+58
-30
lines changed

6 files changed

+58
-30
lines changed

openmp/runtime/src/kmp.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ class kmp_stats_list;
117117
#include <xmmintrin.h>
118118
#endif
119119

120+
// Enable a global task counter to enable throttling if more than
121+
// 'KMP_TASK_MAXIMUM' (env var.) are in flight
122+
#ifndef KMP_COMPILE_GLOBAL_TASK_THROTTLING
123+
# define KMP_COMPILE_GLOBAL_TASK_THROTTLING 0
124+
#endif
125+
120126
// The below has to be defined before including "kmp_barrier.h".
121127
#define KMP_INTERNAL_MALLOC(sz) malloc(sz)
122128
#define KMP_INTERNAL_FREE(p) free(p)
@@ -2422,9 +2428,12 @@ typedef enum kmp_tasking_mode {
24222428
extern kmp_tasking_mode_t
24232429
__kmp_tasking_mode; /* determines how/when to execute tasks */
24242430
extern int __kmp_task_stealing_constraint;
2431+
2432+
#if KMP_COMPILE_GLOBAL_TASK_THROTTLING
24252433
extern std::atomic<kmp_int32> __kmp_n_tasks_in_flight;
2426-
extern int __kmp_enable_task_throttling;
24272434
extern kmp_int32 __kmp_task_maximum;
2435+
#endif /* KMP_COMPILE_GLOBAL_TASK_THROTTLING */
2436+
extern int __kmp_enable_task_throttling;
24282437
extern kmp_int32 __kmp_task_maximum_ready_per_thread;
24292438

24302439
extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if

openmp/runtime/src/kmp_global.cpp

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -354,18 +354,21 @@ KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4);
354354

355355
int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */
356356

357-
std::atomic<kmp_int32> __kmp_n_tasks_in_flight = 0; /* n° of tasks in flight */
357+
/* Serialize tasks once a threshold is reached, such as the number of ready
358+
* tasks or the total number of tasks in flight */
359+
kmp_int32 __kmp_enable_task_throttling = 1;
358360

359-
kmp_int32 __kmp_enable_task_throttling = 1; /* Serialize tasks once a threshold
360-
is reached, such as the number of
361-
ready tasks or the total number of
362-
tasks */
361+
/* number of ready tasks in a thread queue before it starts serializing */
362+
kmp_int32 __kmp_task_maximum_ready_per_thread = INITIAL_TASK_DEQUE_SIZE;
363363

364-
kmp_int32 __kmp_task_maximum = 65536; /* number of tasks threshold before
365-
serializing */
364+
#if KMP_COMPILE_GLOBAL_TASK_THROTTLING
365+
/* n of tasks in flight */
366+
std::atomic<kmp_int32> __kmp_n_tasks_in_flight = 0;
367+
368+
/* maximum number of tasks in flight before serializing */
369+
kmp_int32 __kmp_task_maximum = 65536;
370+
#endif /* KMP_COMPILE_GLOBAL_TASK_THROTTLING */
366371

367-
kmp_int32 __kmp_task_maximum_ready_per_thread = 256; /* number of ready tasks
368-
before serializing */
369372
#ifdef DEBUG_SUSPEND
370373
int __kmp_suspend_count = 0;
371374
#endif

openmp/runtime/src/kmp_settings.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5364,12 +5364,20 @@ static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer,
53645364
// KMP_TASK_MAXIMUM
53655365
static void __kmp_stg_parse_task_maximum(char const *name, char const *value,
53665366
void *data) {
5367+
#if KMP_USE_GLOBAL_TASK_THROTTLING
53675368
__kmp_stg_parse_int(name, value, 1, INT_MAX, &__kmp_task_maximum);
5369+
#else /* KMP_USE_GLOBAL_TASK_THROTTLING */
5370+
// TODO : warn user, runtime not compiled with this variable support
5371+
#endif /* KMP_USE_GLOBAL_TASK_THROTTLING */
53685372
} // __kmp_stg_parse_task_maximum
53695373

53705374
static void __kmp_stg_print_task_maximum(kmp_str_buf_t *buffer,
53715375
char const *name, void *data) {
5376+
#if KMP_USE_GLOBAL_TASK_THROTTLING
53725377
__kmp_stg_print_int(buffer, name, __kmp_task_maximum);
5378+
#else /* KMP_USE_GLOBAL_TASK_THROTTLING */
5379+
__kmp_stg_print_int(buffer, name, -1);
5380+
#endif /* KMP_USE_GLOBAL_TASK_THROTTLING */
53735381
} // __kmp_stg_print_task_maximum
53745382

53755383
// -----------------------------------------------------------------------------

openmp/runtime/src/kmp_tasking.cpp

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -438,9 +438,10 @@ static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
438438

439439
__kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock);
440440
// Check if deque is full
441-
if (__kmp_enable_task_throttling && TCR_4(thread_data->td.td_deque_ntasks) >=
442-
__kmp_task_maximum_ready_per_thread) {
443-
if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
441+
if (TCR_4(thread_data->td.td_deque_ntasks) >=
442+
TASK_DEQUE_SIZE(thread_data->td)) {
443+
if (__kmp_enable_task_throttling &&
444+
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
444445
thread->th.th_current_task)) {
445446
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
446447
KA_TRACE(20, ("__kmp_push_priority_task: T#%d deque is full; returning "
@@ -541,15 +542,15 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
541542
}
542543

543544
int locked = 0;
544-
// Check if deque is full
545+
// Check if deque is full and needs to be expanded
545546
int requires_resize = TCR_4(thread_data->td.td_deque_ntasks) >=
546547
TASK_DEQUE_SIZE(thread_data->td);
548+
// Check if dequeue has too many tasks and needs throttling
547549
int requires_throttling =
548550
__kmp_enable_task_throttling && TCR_4(thread_data->td.td_deque_ntasks) >=
549551
__kmp_task_maximum_ready_per_thread;
550-
int thread_can_execute;
551552
if (requires_resize || requires_throttling) {
552-
thread_can_execute =
553+
int thread_can_execute =
553554
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
554555
thread->th.th_current_task);
555556
if (requires_throttling && thread_can_execute) {
@@ -577,7 +578,7 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
577578
TCR_4(thread_data->td.td_deque_ntasks) >=
578579
__kmp_task_maximum_ready_per_thread;
579580
if (requires_resize || requires_throttling) {
580-
thread_can_execute =
581+
int thread_can_execute =
581582
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
582583
thread->th.th_current_task);
583584
if (requires_throttling && thread_can_execute) {
@@ -592,6 +593,7 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
592593
}
593594
}
594595
}
596+
595597
// Must have room since no thread can add tasks but calling thread
596598
KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) <
597599
TASK_DEQUE_SIZE(thread_data->td));
@@ -924,7 +926,10 @@ static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
924926
#else /* ! USE_FAST_MEMORY */
925927
__kmp_thread_free(thread, taskdata);
926928
#endif
927-
--__kmp_n_tasks_in_flight;
929+
#if KMP_COMPILE_GLOBAL_TASK_THROTTLING
930+
if (__kmp_enable_task_throttling)
931+
--__kmp_n_tasks_in_flight;
932+
#endif /* KMP_COMPILE_GLOBAL_TASK_THROTTLING */
928933
#if OMPX_TASKGRAPH
929934
} else {
930935
taskdata->td_flags.complete = 0;
@@ -1475,10 +1480,15 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
14751480
if (UNLIKELY(!TCR_4(__kmp_init_middle)))
14761481
__kmp_middle_initialize();
14771482

1483+
#if KMP_COMPILE_GLOBAL_TASK_THROTTLING
14781484
// task throttling: too many tasks co-existing, emptying queue now
14791485
if (__kmp_enable_task_throttling)
1486+
{
14801487
while (TCR_4(__kmp_n_tasks_in_flight.load()) >= __kmp_task_maximum)
14811488
__kmpc_omp_taskyield(NULL, gtid, 0);
1489+
++__kmp_n_tasks_in_flight;
1490+
}
1491+
#endif /* KMP_COMPILE_GLOBAL_TASK_THROTTLING */
14821492

14831493
if (flags->hidden_helper) {
14841494
if (__kmp_enable_hidden_helper) {
@@ -1574,7 +1584,6 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
15741584
taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset +
15751585
sizeof_shareds);
15761586
#endif /* USE_FAST_MEMORY */
1577-
++__kmp_n_tasks_in_flight;
15781587

15791588
task = KMP_TASKDATA_TO_TASK(taskdata);
15801589

openmp/runtime/test/tasking/omp_throttling_max.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// clang-format off
2-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=0 %libomp-run
3-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=1 %libomp-run
4-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=256 %libomp-run
5-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=65536 %libomp-run
6-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=100000 %libomp-run
2+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=999999 KMP_TASK_MAXIMUM=0 %libomp-run
3+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=999999 KMP_TASK_MAXIMUM=1 %libomp-run
4+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=999999 KMP_TASK_MAXIMUM=256 %libomp-run
5+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=999999 KMP_TASK_MAXIMUM=65536 %libomp-run
6+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=999999 KMP_TASK_MAXIMUM=100000 %libomp-run
77
// clang-format on
88

99
/**

openmp/runtime/test/tasking/omp_throttling_max_ready_per_thread.c

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
// clang-format off
2-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=0 %libomp-run
3-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=1 %libomp-run
4-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=256 %libomp-run
5-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=65536 %libomp-run
6-
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM_READY_PER_THREAD=100000 %libomp-run
2+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=999999 KMP_TASK_MAXIMUM_READY_PER_THREAD=0 %libomp-run
3+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=999999 KMP_TASK_MAXIMUM_READY_PER_THREAD=1 %libomp-run
4+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=999999 KMP_TASK_MAXIMUM_READY_PER_THREAD=256 %libomp-run
5+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=999999 KMP_TASK_MAXIMUM_READY_PER_THREAD=65536 %libomp-run
6+
// RUN: %libomp-compile && env OMP_NUM_THREADS=2 KMP_ENABLE_TASK_THROTTLING=1 KMP_TASK_MAXIMUM=999999 KMP_TASK_MAXIMUM_READY_PER_THREAD=100000 %libomp-run
77
// clang-format on
88

99
/**
@@ -22,7 +22,6 @@
2222
#include <assert.h>
2323
#include <omp.h>
2424
#include <stdlib.h>
25-
#include <stdio.h>
2625

2726
#define MAX_TASKS_READY_DEFAULT (1 << 8)
2827

0 commit comments

Comments
 (0)