diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 83afc0e83f231..6b28b504392a3 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -2649,6 +2649,14 @@ typedef struct kmp_node_info { kmp_taskdata_t *parent_task; // Parent implicit task } kmp_node_info_t; +// Representation of recorded nodes +typedef struct kmp_node_vector { + kmp_node_info_t **blocks; + kmp_int32 block_size; + std::atomic num_of_blocks; + kmp_bootstrap_lock_t lock; +} kmp_node_vector_t; + /// Represent a TDG's current status typedef enum kmp_tdg_status { KMP_TDG_NONE = 0, @@ -2660,15 +2668,14 @@ typedef enum kmp_tdg_status { typedef struct kmp_tdg_info { kmp_int32 tdg_id; // Unique idenfifier of the TDG kmp_taskgraph_flags_t tdg_flags; // Flags related to a TDG - kmp_int32 map_size; // Number of allocated TDG nodes + /* kmp_int32 map_size; // Number of allocated TDG nodes */ kmp_int32 num_roots; // Number of roots tasks int the TDG kmp_int32 *root_tasks; // Array of tasks identifiers that are roots - kmp_node_info_t *record_map; // Array of TDG nodes + kmp_node_vector_t *record_map; // Array of TDG nodes kmp_tdg_status_t tdg_status = KMP_TDG_NONE; // Status of the TDG (recording, ready...) std::atomic num_tasks; // Number of TDG nodes - kmp_bootstrap_lock_t - graph_lock; // Protect graph attributes when updated via taskloop_recur + std::atomic tdg_task_id_next; // Task id of next node // Taskloop reduction related void *rec_taskred_data; // Data to pass to __kmpc_task_reduction_init or // __kmpc_taskred_init @@ -2804,6 +2811,7 @@ struct kmp_taskdata { /* aligned during dynamic allocation */ #if OMPX_TASKGRAPH bool is_taskgraph = 0; // whether the task is within a TDG kmp_tdg_info_t *tdg; // used to associate task with a TDG + kmp_node_info_t *td_tdg_node_info; // node representing the task's in the TDG kmp_int32 td_tdg_task_id; // local task id in its TDG #endif kmp_target_data_t td_target_data; diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index abbca752f0587..2d1256431752a 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -232,8 +232,7 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, } if (task_sink->is_taskgraph && __kmp_tdg_is_recording(task_sink->tdg->tdg_status)) { - kmp_node_info_t *source_info = - &task_sink->tdg->record_map[task_source->td_tdg_task_id]; + kmp_node_info_t *source_info = task_source->td_tdg_node_info; bool exists = false; for (int i = 0; i < source_info->nsuccessors; i++) { if (source_info->successors[i] == task_sink->td_tdg_task_id) { @@ -244,21 +243,24 @@ static inline void __kmp_track_dependence(kmp_int32 gtid, kmp_depnode_t *source, if (!exists) { if (source_info->nsuccessors >= source_info->successors_size) { kmp_uint old_size = source_info->successors_size; - source_info->successors_size = 2 * source_info->successors_size; + source_info->successors_size = old_size == 0 + ? __kmp_successors_size + : 2 * source_info->successors_size; kmp_int32 *old_succ_ids = source_info->successors; kmp_int32 *new_succ_ids = (kmp_int32 *)__kmp_allocate( source_info->successors_size * sizeof(kmp_int32)); - KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32)); + if (old_succ_ids) { + KMP_MEMCPY(new_succ_ids, old_succ_ids, old_size * sizeof(kmp_int32)); + __kmp_free(old_succ_ids); + } source_info->successors = new_succ_ids; - __kmp_free(old_succ_ids); } source_info->successors[source_info->nsuccessors] = task_sink->td_tdg_task_id; source_info->nsuccessors++; - kmp_node_info_t *sink_info = - &(task_sink->tdg->record_map[task_sink->td_tdg_task_id]); + kmp_node_info_t *sink_info = task_sink->td_tdg_node_info; sink_info->npredecessors++; } } @@ -700,39 +702,15 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { kmp_tdg_info_t *tdg = new_taskdata->tdg; // extend record_map if needed - if (new_taskdata->td_tdg_task_id >= tdg->map_size) { - __kmp_acquire_bootstrap_lock(&tdg->graph_lock); - if (new_taskdata->td_tdg_task_id >= tdg->map_size) { - kmp_uint old_size = tdg->map_size; - kmp_uint new_size = old_size * 2; - kmp_node_info_t *old_record = tdg->record_map; - kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate( - new_size * sizeof(kmp_node_info_t)); - KMP_MEMCPY(new_record, tdg->record_map, - old_size * sizeof(kmp_node_info_t)); - tdg->record_map = new_record; - - __kmp_free(old_record); - - for (kmp_int i = old_size; i < new_size; i++) { - kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( - __kmp_successors_size * sizeof(kmp_int32)); - new_record[i].task = nullptr; - new_record[i].successors = successorsList; - new_record[i].nsuccessors = 0; - new_record[i].npredecessors = 0; - new_record[i].successors_size = __kmp_successors_size; - KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0); - } - // update the size at the end, so that we avoid other - // threads use old_record while map_size is already updated - tdg->map_size = new_size; - } - __kmp_release_bootstrap_lock(&tdg->graph_lock); + kmp_node_info_t *node = + kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id); + if (node == nullptr) { + kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2); + node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id); } - tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task; - tdg->record_map[new_taskdata->td_tdg_task_id].parent_task = - new_taskdata->td_parent; + node->task = new_task; + node->parent_task = new_taskdata->td_parent; + new_taskdata->td_tdg_node_info = node; KMP_ATOMIC_INC(&tdg->num_tasks); } #endif diff --git a/openmp/runtime/src/kmp_taskdeps.h b/openmp/runtime/src/kmp_taskdeps.h index f6bfb39218a21..b67317818a4cf 100644 --- a/openmp/runtime/src/kmp_taskdeps.h +++ b/openmp/runtime/src/kmp_taskdeps.h @@ -93,16 +93,26 @@ static inline void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) { } extern void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start); +#if OMPX_TASKGRAPH +extern kmp_node_vector_t *kmp_alloc_tdg_vector(kmp_int32 block_size); +extern kmp_node_info_t *kmp_node_vector_get(kmp_node_vector_t *vector, + kmp_int32 id); +extern void kmp_node_vector_resize(kmp_node_vector_t *vector, kmp_int32 size); +extern void kmp_node_vector_free(kmp_node_vector_t *vector); +#endif static inline void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { #if OMPX_TASKGRAPH if (task->is_taskgraph && !(__kmp_tdg_is_recording(task->tdg->tdg_status))) { - kmp_node_info_t *TaskInfo = &(task->tdg->record_map[task->td_tdg_task_id]); + kmp_node_info_t *TaskInfo = task->td_tdg_node_info; for (int i = 0; i < TaskInfo->nsuccessors; i++) { kmp_int32 successorNumber = TaskInfo->successors[i]; - kmp_node_info_t *successor = &(task->tdg->record_map[successorNumber]); + kmp_node_info_t *successor = + kmp_node_vector_get(task->tdg->record_map, successorNumber); + /* kmp_node_info_t *successor = &(task->tdg->record_map[successorNumber]); + */ kmp_int32 npredecessors = KMP_ATOMIC_DEC(&successor->npredecessors_counter) - 1; if (successor->task != nullptr && npredecessors == 0) { __kmp_omp_task(gtid, successor->task, false); diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index 37836fb457537..307e69ba992b4 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -1394,6 +1394,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, taskdata->td_flags.onced = 0; taskdata->is_taskgraph = 0; taskdata->tdg = nullptr; + taskdata->td_tdg_node_info = nullptr; #endif KMP_ATOMIC_ST_RLX(&taskdata->td_incomplete_child_tasks, 0); // start at one because counts current task and children @@ -1437,7 +1438,7 @@ kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, taskdata->is_taskgraph = 1; taskdata->tdg = __kmp_global_tdgs[__kmp_curr_tdg_idx]; taskdata->td_task_id = KMP_GEN_TASK_ID(); - taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); + taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&tdg->tdg_task_id_next); } #endif KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", @@ -1799,44 +1800,17 @@ kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, if (new_taskdata->is_taskgraph && __kmp_tdg_is_recording(new_taskdata->tdg->tdg_status)) { kmp_tdg_info_t *tdg = new_taskdata->tdg; - // extend the record_map if needed - if (new_taskdata->td_tdg_task_id >= new_taskdata->tdg->map_size) { - __kmp_acquire_bootstrap_lock(&tdg->graph_lock); - // map_size could have been updated by another thread if recursive - // taskloop - if (new_taskdata->td_tdg_task_id >= tdg->map_size) { - kmp_uint old_size = tdg->map_size; - kmp_uint new_size = old_size * 2; - kmp_node_info_t *old_record = tdg->record_map; - kmp_node_info_t *new_record = (kmp_node_info_t *)__kmp_allocate( - new_size * sizeof(kmp_node_info_t)); - - KMP_MEMCPY(new_record, old_record, old_size * sizeof(kmp_node_info_t)); - tdg->record_map = new_record; - - __kmp_free(old_record); - - for (kmp_int i = old_size; i < new_size; i++) { - kmp_int32 *successorsList = (kmp_int32 *)__kmp_allocate( - __kmp_successors_size * sizeof(kmp_int32)); - new_record[i].task = nullptr; - new_record[i].successors = successorsList; - new_record[i].nsuccessors = 0; - new_record[i].npredecessors = 0; - new_record[i].successors_size = __kmp_successors_size; - KMP_ATOMIC_ST_REL(&new_record[i].npredecessors_counter, 0); - } - // update the size at the end, so that we avoid other - // threads use old_record while map_size is already updated - tdg->map_size = new_size; - } - __kmp_release_bootstrap_lock(&tdg->graph_lock); + + kmp_node_info_t *node = + kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id); + if (node == nullptr) { + kmp_node_vector_resize(tdg->record_map, new_taskdata->td_tdg_task_id * 2); + node = kmp_node_vector_get(tdg->record_map, new_taskdata->td_tdg_task_id); } - // record a task - if (tdg->record_map[new_taskdata->td_tdg_task_id].task == nullptr) { - tdg->record_map[new_taskdata->td_tdg_task_id].task = new_task; - tdg->record_map[new_taskdata->td_tdg_task_id].parent_task = - new_taskdata->td_parent; + if (node->task == nullptr) { + node->task = new_task; + node->parent_task = new_taskdata->td_parent; + new_taskdata->td_tdg_node_info = node; KMP_ATOMIC_INC(&tdg->num_tasks); } } @@ -4334,6 +4308,95 @@ void __kmpc_give_task(kmp_task_t *ptask, kmp_int32 start = 0) { } } +#if OMPX_TASKGRAPH +kmp_node_vector_t *kmp_alloc_tdg_vector(kmp_int32 block_size) { + constexpr kmp_int32 block_nums = 1; + kmp_node_vector_t *vector = + (kmp_node_vector_t *)__kmp_allocate(sizeof(kmp_node_vector_t)); + kmp_node_info_t **blocks = (kmp_node_info_t **)__kmp_allocate( + block_nums * sizeof(kmp_node_info_t *)); + for (kmp_int32 i = 0; i < block_nums; i++) { + kmp_node_info_t *this_record_map = + (kmp_node_info_t *)__kmp_allocate(block_size * sizeof(kmp_node_info_t)); + for (kmp_int32 j = 0; j < block_size; j++) { + this_record_map[j].task = nullptr; + this_record_map[j].parent_task = nullptr; + this_record_map[j].successors = nullptr; + this_record_map[j].nsuccessors = 0; + this_record_map[j].npredecessors = 0; + this_record_map[j].successors_size = 0; + KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, 0); + } + blocks[i] = this_record_map; + } + vector->blocks = blocks; + vector->block_size = block_size; + vector->num_of_blocks = block_nums; + + return vector; +} + +void kmp_node_vector_resize(kmp_node_vector_t *vector, kmp_int32 size) { + const kmp_int32 num_of_blocks = KMP_ATOMIC_LD_RLX(&vector->num_of_blocks); + kmp_int32 current_size = vector->block_size * num_of_blocks; + if (current_size >= size) + return; + + const kmp_int32 new_block_nums = 1 + ((size - 1) / vector->block_size); + kmp_node_info_t **old_blocks = vector->blocks; + kmp_node_info_t **new_blocks = (kmp_node_info_t **)__kmp_allocate( + new_block_nums * sizeof(kmp_node_info_t *)); + KMP_MEMCPY(new_blocks, old_blocks, num_of_blocks * sizeof(kmp_node_info_t *)); + for (kmp_int i = num_of_blocks; i < new_block_nums; i++) { + kmp_node_info_t *this_record_map = (kmp_node_info_t *)__kmp_allocate( + vector->block_size * sizeof(kmp_node_info_t)); + for (kmp_int32 j = 0; j < vector->block_size; j++) { + this_record_map[j].task = nullptr; + this_record_map[j].parent_task = nullptr; + this_record_map[j].successors = nullptr; + this_record_map[j].nsuccessors = 0; + this_record_map[j].npredecessors = 0; + this_record_map[j].successors_size = 0; + KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, 0); + } + new_blocks[i] = this_record_map; + } + __kmp_acquire_bootstrap_lock(&vector->lock); + current_size = vector->block_size * KMP_ATOMIC_LD_RLX(&vector->num_of_blocks); + if (current_size >= size) { + for (kmp_int i = num_of_blocks; i < new_block_nums; i++) + __kmp_free(new_blocks[i]); + __kmp_free(new_blocks); + } else { + vector->blocks = new_blocks; + KMP_ATOMIC_ST_REL(&vector->num_of_blocks, new_block_nums); + __kmp_free(old_blocks); + } + __kmp_release_bootstrap_lock(&vector->lock); +} + +kmp_node_info_t *kmp_node_vector_get(kmp_node_vector_t *vector, kmp_int32 id) { + const kmp_int32 num_of_blocks = KMP_ATOMIC_LD_RLX(&vector->num_of_blocks); + const kmp_int32 vector_size = vector->block_size * num_of_blocks; + if (id >= vector_size) + return nullptr; + const kmp_int32 block_idx = id / vector->block_size; + const kmp_int32 node_idx = id % vector->block_size; + __kmp_acquire_bootstrap_lock(&vector->lock); + kmp_node_info_t *ret = &(vector->blocks[block_idx][node_idx]); + __kmp_release_bootstrap_lock(&vector->lock); + return ret; +} + +void kmp_node_vector_free(kmp_node_vector_t *vector) { + __kmp_acquire_bootstrap_lock(&vector->lock); + for (int i = 0; i < vector->num_of_blocks; i++) + __kmp_free(vector->blocks[i]); + __kmp_free(vector->blocks); + __kmp_release_bootstrap_lock(&vector->lock); +} +#endif + /*! @ingroup TASKING @param ptask Task which execution is completed @@ -4465,7 +4528,8 @@ kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src #if OMPX_TASKGRAPH if (taskdata->is_taskgraph && !taskloop_recur && __kmp_tdg_is_recording(taskdata_src->tdg->tdg_status)) - taskdata->td_tdg_task_id = KMP_ATOMIC_INC(&__kmp_tdg_task_id); + taskdata->td_tdg_task_id = + KMP_ATOMIC_INC(&taskdata_src->tdg->tdg_task_id_next); #endif taskdata->td_task_id = KMP_GEN_TASK_ID(); if (task->shareds != NULL) { // need setup shareds pointer @@ -4979,10 +5043,6 @@ static void __kmp_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, #endif __kmpc_taskgroup(loc, gtid); } - -#if OMPX_TASKGRAPH - KMP_ATOMIC_DEC(&__kmp_tdg_task_id); -#endif // ========================================================================= // calculate loop parameters kmp_taskloop_bounds_t task_bounds(task, lb, ub); @@ -5263,6 +5323,9 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) { kmp_safe_raii_file_t tdg_file(file_name, "w"); kmp_int32 num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks); + /* kmp_int32 map_size = tdg->map_size; */ + kmp_int32 map_size = + tdg->record_map->block_size * tdg->record_map->num_of_blocks; fprintf(tdg_file, "digraph TDG {\n" " compound=true\n" @@ -5273,9 +5336,14 @@ void __kmp_print_tdg_dot(kmp_tdg_info_t *tdg, kmp_int32 gtid) { fprintf(tdg_file, " %d[style=bold]\n", i); } fprintf(tdg_file, " }\n"); - for (kmp_int32 i = 0; i < num_tasks; i++) { - kmp_int32 nsuccessors = tdg->record_map[i].nsuccessors; - kmp_int32 *successors = tdg->record_map[i].successors; + kmp_int32 tasks = 0; + for (kmp_int32 i = 0; tasks < num_tasks && i < map_size; i++) { + kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i); + if (node->task == nullptr) + continue; + tasks++; + kmp_int32 nsuccessors = node->nsuccessors; + kmp_int32 *successors = node->successors; if (nsuccessors > 0) { for (kmp_int32 j = 0; j < nsuccessors; j++) fprintf(tdg_file, " %d -> %d \n", i, successors[j]); @@ -5293,10 +5361,13 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) { KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_READY); KA_TRACE(10, ("__kmp_exec_tdg(enter): T#%d tdg_id=%d num_roots=%d\n", gtid, tdg->tdg_id, tdg->num_roots)); - kmp_node_info_t *this_record_map = tdg->record_map; + /* kmp_node_info_t *this_record_map = tdg->record_map; */ kmp_int32 *this_root_tasks = tdg->root_tasks; kmp_int32 this_num_roots = tdg->num_roots; kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks); + kmp_int32 map_size = + tdg->record_map->block_size * tdg->record_map->num_of_blocks; + kmp_int32 tasks = 0; kmp_info_t *thread = __kmp_threads[gtid]; kmp_taskdata_t *parent_task = thread->th.th_current_task; @@ -5305,18 +5376,20 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) { __kmpc_taskred_init(gtid, tdg->rec_num_taskred, tdg->rec_taskred_data); } - for (kmp_int32 j = 0; j < this_num_tasks; j++) { - kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(this_record_map[j].task); + for (kmp_int32 j = 0; j < map_size && tasks < this_num_tasks; j++) { + kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, j); + if (node->task == nullptr) + continue; + tasks++; + kmp_taskdata_t *td = KMP_TASK_TO_TASKDATA(node->task); td->td_parent = parent_task; - this_record_map[j].parent_task = parent_task; + node->parent_task = parent_task; - kmp_taskgroup_t *parent_taskgroup = - this_record_map[j].parent_task->td_taskgroup; + kmp_taskgroup_t *parent_taskgroup = node->parent_task->td_taskgroup; - KMP_ATOMIC_ST_RLX(&this_record_map[j].npredecessors_counter, - this_record_map[j].npredecessors); - KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_incomplete_child_tasks); + KMP_ATOMIC_ST_RLX(&node->npredecessors_counter, node->npredecessors); + KMP_ATOMIC_INC(&node->parent_task->td_incomplete_child_tasks); if (parent_taskgroup) { KMP_ATOMIC_INC(&parent_taskgroup->count); @@ -5326,12 +5399,14 @@ void __kmp_exec_tdg(kmp_int32 gtid, kmp_tdg_info_t *tdg) { // If the parent doesnt have a taskgroup, remove it from the task td->td_taskgroup = nullptr; } - if (this_record_map[j].parent_task->td_flags.tasktype == TASK_EXPLICIT) - KMP_ATOMIC_INC(&this_record_map[j].parent_task->td_allocated_child_tasks); + if (node->parent_task->td_flags.tasktype == TASK_EXPLICIT) + KMP_ATOMIC_INC(&node->parent_task->td_allocated_child_tasks); } for (kmp_int32 j = 0; j < this_num_roots; ++j) { - __kmp_omp_task(gtid, this_record_map[this_root_tasks[j]].task, true); + kmp_node_info_t *node = + kmp_node_vector_get(tdg->record_map, this_root_tasks[j]); + __kmp_omp_task(gtid, node->task, true); } KA_TRACE(10, ("__kmp_exec_tdg(exit): T#%d tdg_id=%d num_roots=%d\n", gtid, tdg->tdg_id, tdg->num_roots)); @@ -5350,29 +5425,14 @@ static inline void __kmp_start_record(kmp_int32 gtid, __kmp_global_tdgs[__kmp_curr_tdg_idx] = tdg; // Initializing the TDG structure tdg->tdg_id = tdg_id; - tdg->map_size = INIT_MAPSIZE; + /* tdg->map_size = INIT_MAPSIZE; */ tdg->num_roots = -1; tdg->root_tasks = nullptr; tdg->tdg_status = KMP_TDG_RECORDING; tdg->rec_num_taskred = 0; tdg->rec_taskred_data = nullptr; KMP_ATOMIC_ST_RLX(&tdg->num_tasks, 0); - - // Initializing the list of nodes in this TDG - kmp_node_info_t *this_record_map = - (kmp_node_info_t *)__kmp_allocate(INIT_MAPSIZE * sizeof(kmp_node_info_t)); - for (kmp_int32 i = 0; i < INIT_MAPSIZE; i++) { - kmp_int32 *successorsList = - (kmp_int32 *)__kmp_allocate(__kmp_successors_size * sizeof(kmp_int32)); - this_record_map[i].task = nullptr; - this_record_map[i].successors = successorsList; - this_record_map[i].nsuccessors = 0; - this_record_map[i].npredecessors = 0; - this_record_map[i].successors_size = __kmp_successors_size; - KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, 0); - } - - __kmp_global_tdgs[__kmp_curr_tdg_idx]->record_map = this_record_map; + tdg->record_map = kmp_alloc_tdg_vector(INIT_MAPSIZE); } // __kmpc_start_record_task: Wrapper around __kmp_start_record to mark @@ -5422,22 +5482,28 @@ kmp_int32 __kmpc_start_record_task(ident_t *loc_ref, kmp_int32 gtid, // tdg: Pointer to the TDG void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) { // Store roots - kmp_node_info_t *this_record_map = tdg->record_map; kmp_int32 this_num_tasks = KMP_ATOMIC_LD_RLX(&tdg->num_tasks); kmp_int32 *this_root_tasks = (kmp_int32 *)__kmp_allocate(this_num_tasks * sizeof(kmp_int32)); - kmp_int32 this_map_size = tdg->map_size; + kmp_int32 this_map_size = + tdg->record_map->block_size * tdg->record_map->num_of_blocks; kmp_int32 this_num_roots = 0; kmp_info_t *thread = __kmp_threads[gtid]; + kmp_int32 tasks = 0; - for (kmp_int32 i = 0; i < this_num_tasks; i++) { - if (this_record_map[i].npredecessors == 0) { + for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) { + kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i); + if (node->task == nullptr) { + continue; + } + tasks++; + if (node->npredecessors == 0) { this_root_tasks[this_num_roots++] = i; } } // Update with roots info and mapsize - tdg->map_size = this_map_size; + /* tdg->map_size = this_map_size; */ tdg->num_roots = this_num_roots; tdg->root_tasks = this_root_tasks; KMP_DEBUG_ASSERT(tdg->tdg_status == KMP_TDG_RECORDING); @@ -5449,11 +5515,15 @@ void __kmp_end_record(kmp_int32 gtid, kmp_tdg_info_t *tdg) { } // Reset predecessor counter - for (kmp_int32 i = 0; i < this_num_tasks; i++) { - KMP_ATOMIC_ST_RLX(&this_record_map[i].npredecessors_counter, - this_record_map[i].npredecessors); + tasks = 0; + for (kmp_int32 i = 0; tasks < this_num_tasks && i < this_map_size; i++) { + kmp_node_info_t *node = kmp_node_vector_get(tdg->record_map, i); + if (node->task == nullptr) { + continue; + } + tasks++; + KMP_ATOMIC_ST_RLX(&node->npredecessors_counter, node->npredecessors); } - KMP_ATOMIC_ST_RLX(&__kmp_tdg_task_id, 0); if (__kmp_tdg_dot) __kmp_print_tdg_dot(tdg, gtid);