@@ -438,10 +438,9 @@ static kmp_int32 __kmp_push_priority_task(kmp_int32 gtid, kmp_info_t *thread,
438438
439439 __kmp_acquire_bootstrap_lock (&thread_data->td .td_deque_lock );
440440 // Check if deque is full
441- if (TCR_4 (thread_data->td .td_deque_ntasks ) >=
442- TASK_DEQUE_SIZE (thread_data->td )) {
443- if (__kmp_enable_task_throttling &&
444- __kmp_task_is_allowed (gtid, __kmp_task_stealing_constraint, taskdata,
441+ if (__kmp_enable_task_throttling && TCR_4 (thread_data->td .td_deque_ntasks ) >=
442+ __kmp_task_maximum_ready_per_thread) {
443+ if (__kmp_task_is_allowed (gtid, __kmp_task_stealing_constraint, taskdata,
445444 thread->th .th_current_task )) {
446445 __kmp_release_bootstrap_lock (&thread_data->td .td_deque_lock );
447446 KA_TRACE (20 , (" __kmp_push_priority_task: T#%d deque is full; returning "
@@ -543,40 +542,51 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
543542
544543 int locked = 0 ;
545544 // Check if deque is full
546- if (TCR_4 (thread_data->td .td_deque_ntasks ) >=
547- TASK_DEQUE_SIZE (thread_data->td )) {
548- if (__kmp_enable_task_throttling &&
545+ int requires_resize = TCR_4 (thread_data->td .td_deque_ntasks ) >=
546+ TASK_DEQUE_SIZE (thread_data->td );
547+ int requires_throttling =
548+ __kmp_enable_task_throttling && TCR_4 (thread_data->td .td_deque_ntasks ) >=
549+ __kmp_task_maximum_ready_per_thread;
550+ int thread_can_execute;
551+ if (requires_resize || requires_throttling) {
552+ thread_can_execute =
549553 __kmp_task_is_allowed (gtid, __kmp_task_stealing_constraint, taskdata,
550- thread->th .th_current_task )) {
554+ thread->th .th_current_task );
555+ if (requires_throttling && thread_can_execute) {
551556 KA_TRACE (20 , (" __kmp_push_task: T#%d deque is full; returning "
552557 " TASK_NOT_PUSHED for task %p\n " ,
553558 gtid, taskdata));
554559 return TASK_NOT_PUSHED;
555- } else {
560+ } else { /* maybe requires_resize */
556561 __kmp_acquire_bootstrap_lock (&thread_data->td .td_deque_lock );
557562 locked = 1 ;
558- if (TCR_4 (thread_data->td .td_deque_ntasks ) >=
559- TASK_DEQUE_SIZE (thread_data->td )) {
560- // expand deque to push the task which is not allowed to execute
563+ requires_resize = TCR_4 (thread_data->td .td_deque_ntasks ) >=
564+ TASK_DEQUE_SIZE (thread_data->td );
565+ // expand deque to push the task which is not allowed to execute
566+ if (requires_resize)
561567 __kmp_realloc_task_deque (thread, thread_data);
562- }
563568 }
564569 }
565570 // Lock the deque for the task push operation
566571 if (!locked) {
567572 __kmp_acquire_bootstrap_lock (&thread_data->td .td_deque_lock );
568573 // Need to recheck as we can get a proxy task from thread outside of OpenMP
569- if (TCR_4 (thread_data->td .td_deque_ntasks ) >=
570- TASK_DEQUE_SIZE (thread_data->td )) {
571- if (__kmp_enable_task_throttling &&
574+ requires_resize = TCR_4 (thread_data->td .td_deque_ntasks ) >=
575+ TASK_DEQUE_SIZE (thread_data->td );
576+ requires_throttling = __kmp_enable_task_throttling &&
577+ TCR_4 (thread_data->td .td_deque_ntasks ) >=
578+ __kmp_task_maximum_ready_per_thread;
579+ if (requires_resize || requires_throttling) {
580+ thread_can_execute =
572581 __kmp_task_is_allowed (gtid, __kmp_task_stealing_constraint, taskdata,
573- thread->th .th_current_task )) {
582+ thread->th .th_current_task );
583+ if (requires_throttling && thread_can_execute) {
574584 __kmp_release_bootstrap_lock (&thread_data->td .td_deque_lock );
575585 KA_TRACE (20 , (" __kmp_push_task: T#%d deque is full on 2nd check; "
576586 " returning TASK_NOT_PUSHED for task %p\n " ,
577587 gtid, taskdata));
578588 return TASK_NOT_PUSHED;
579- } else {
589+ } else { /* requires_resize */
580590 // expand deque to push the task which is not allowed to execute
581591 __kmp_realloc_task_deque (thread, thread_data);
582592 }
@@ -914,6 +924,7 @@ static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata,
914924#else /* ! USE_FAST_MEMORY */
915925 __kmp_thread_free (thread, taskdata);
916926#endif
927+ --__kmp_n_tasks_in_flight;
917928#if OMPX_TASKGRAPH
918929 } else {
919930 taskdata->td_flags .complete = 0 ;
@@ -1464,6 +1475,11 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
14641475 if (UNLIKELY (!TCR_4 (__kmp_init_middle)))
14651476 __kmp_middle_initialize ();
14661477
1478+ // task throttling: to many tasks co-existing, emptying queue now
1479+ if (__kmp_enable_task_throttling)
1480+ while (TCR_4 (__kmp_n_tasks_in_flight.load ()) >= __kmp_task_maximum)
1481+ __kmpc_omp_taskyield (NULL , gtid, 0 );
1482+
14671483 if (flags->hidden_helper ) {
14681484 if (__kmp_enable_hidden_helper) {
14691485 if (!TCR_4 (__kmp_init_hidden_helper))
@@ -1558,6 +1574,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
15581574 taskdata = (kmp_taskdata_t *)__kmp_thread_malloc (thread, shareds_offset +
15591575 sizeof_shareds);
15601576#endif /* USE_FAST_MEMORY */
1577+ ++__kmp_n_tasks_in_flight;
15611578
15621579 task = KMP_TASKDATA_TO_TASK (taskdata);
15631580
0 commit comments