Skip to content

Commit 0368d69

Browse files
committed
Fix CHANGE TEAM and END TEAM deadlocks
CHANGE TEAM deadlocks when called by a subset of images in the current team. and there is an extra END TEAM MPI_Barrier() that doesn't have a matching call in images that don't call CHANGE TEAM / END TEAM. Fixes #610
1 parent 003efcb commit 0368d69

File tree

4 files changed

+39
-3
lines changed

4 files changed

+39
-3
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,6 +718,7 @@ if(opencoarrays_aware_compiler)
718718
add_caf_test(alloc_comp_get_convert_nums 2 alloc_comp_get_convert_nums)
719719
if(NOT CMAKE_Fortran_COMPILER_VERSION VERSION_LESS 8)
720720
add_caf_test(team_number 2 team_number)
721+
add_caf_test(teams_subset 3 teams_subset)
721722
add_caf_test(get_communicator 3 get_communicator)
722723
add_caf_test(alloc_comp_multidim_shape 2 alloc_comp_multidim_shape)
723724
endif()

src/mpi/mpi_caf.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8098,7 +8098,6 @@ void PREFIX(change_team) (caf_team_t *team,
80988098
caf_teams_list * tmp_list = NULL;
80998099
void *tmp_team;
81008100
MPI_Comm *tmp_comm;
8101-
int ierr = MPI_Barrier(CAF_COMM_WORLD); chk_err(ierr);
81028101

81038102
tmp_list = (struct caf_teams_list *)*team;
81048103
tmp_team = (void *)tmp_list->team;
@@ -8131,9 +8130,10 @@ void PREFIX(change_team) (caf_team_t *team,
81318130
tmp_team = tmp_used->team_list_elem->team;
81328131
tmp_comm = (MPI_Comm *)tmp_team;
81338132
CAF_COMM_WORLD = *tmp_comm;
8134-
ierr = MPI_Comm_rank(*tmp_comm,&caf_this_image); chk_err(ierr);
8133+
int ierr = MPI_Comm_rank(*tmp_comm,&caf_this_image); chk_err(ierr);
81358134
caf_this_image++;
81368135
ierr = MPI_Comm_size(*tmp_comm,&caf_num_images); chk_err(ierr);
8136+
ierr = MPI_Barrier(*tmp_comm); chk_err(ierr);
81378137
}
81388138

81398139
MPI_Fint
@@ -8178,7 +8178,6 @@ void PREFIX(end_team) (caf_team_t *team __attribute__((unused)))
81788178
tmp_team = tmp_used->team_list_elem->team;
81798179
tmp_comm = (MPI_Comm *)tmp_team;
81808180
CAF_COMM_WORLD = *tmp_comm;
8181-
ierr = MPI_Barrier(CAF_COMM_WORLD); chk_err(ierr);
81828181
/* CAF_COMM_WORLD = (MPI_Comm)*tmp_used->team_list_elem->team; */
81838182
ierr = MPI_Comm_rank(CAF_COMM_WORLD,&caf_this_image); chk_err(ierr);
81848183
caf_this_image++;

src/tests/unit/teams/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
caf_compile_executable(team_number team-number.f90)
2+
caf_compile_executable(teams_subset teams_subset.f90)
23
caf_compile_executable(get_communicator get-communicator.f90)

src/tests/unit/teams/teams_subset.f90

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
program teams_subset
2+
use iso_fortran_env, only : team_type
3+
implicit none
4+
5+
type(team_type) :: team
6+
integer :: initial_team_image, max_image(2), min_image(2), myteam
7+
8+
initial_team_image = this_image()
9+
10+
if (initial_team_image == 1 .or. initial_team_image == num_images()) then
11+
myteam = 1
12+
else
13+
myteam = 2
14+
end if
15+
16+
form team (myteam, team)
17+
18+
if (myteam == 1) then
19+
change team(team)
20+
max_image = [initial_team_image, this_image()]
21+
call co_max(max_image)
22+
min_image = [initial_team_image, this_image()]
23+
call co_min(min_image)
24+
end team
25+
if (any(min_image /= [1, 1]) .or. any(max_image /= [num_images(), 2])) then
26+
write(*,*) "Test failed."
27+
error stop
28+
end if
29+
end if
30+
31+
sync all
32+
33+
if (initial_team_image == 1) write(*,*) "Test passed."
34+
35+
end program

0 commit comments

Comments
 (0)