Skip to content

Commit e97eebb

Browse files
committed
Initial fix for #260.
1 parent 326996b commit e97eebb

File tree

3 files changed

+68
-45
lines changed

3 files changed

+68
-45
lines changed

CMakeLists.txt

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,9 @@ set(tests_root ${CMAKE_CURRENT_BINARY_DIR}/src/tests)
465465

466466

467467
if(opencoarrays_aware_compiler)
468+
if (RUN_DEVELOPER_TESTS)
469+
message ( STATUS "Running Developer tests is enabled." )
470+
endif()
468471
# Unit tests targeting each libcaf_mpi function, argument, and branch of code
469472
add_mpi_test(initialize_mpi 2 ${tests_root}/unit/init_register/initialize_mpi)
470473
add_mpi_test(register 2 ${tests_root}/unit/init_register/register)
@@ -476,13 +479,7 @@ if(opencoarrays_aware_compiler)
476479
add_mpi_test(register_alloc_comp_1 2 ${tests_root}/unit/init_register/register_alloc_comp_1)
477480
add_mpi_test(register_alloc_comp_2 2 ${tests_root}/unit/init_register/register_alloc_comp_2)
478481
add_mpi_test(register_alloc_comp_3 2 ${tests_root}/unit/init_register/register_alloc_comp_3)
479-
if (RUN_DEVELOPER_TESTS)
480-
message ( STATUS "Running Developer tests is enabled." )
481-
add_mpi_test(async_comp_alloc 6 ${tests_root}/unit/init_register/async_comp_alloc)
482-
# Timeout async_comp_alloc test after 3 seconds to progess past the known failure
483-
set_property(TEST async_comp_alloc PROPERTY TIMEOUT_AFTER_MATCH 3 "known failure")
484-
set_property(TEST async_comp_alloc PROPERTY TIMEOUT 6) # in the event old CMake is being used
485-
endif()
482+
add_mpi_test(async_comp_alloc 6 ${tests_root}/unit/init_register/async_comp_alloc)
486483
endif()
487484
add_mpi_test(get_array 2 ${tests_root}/unit/send-get/get_array)
488485
add_mpi_test(get_self 2 ${tests_root}/unit/send-get/get_self)

src/mpi/mpi_caf.c

Lines changed: 62 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,15 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
4949

5050

5151
#ifdef GCC_GE_7
52+
enum flags_t {
53+
/** Set when this token has memory associated to it. */
54+
FLAG_ASSOCIATED = 1,
55+
/** Set when the memory associated to this token is not owned by the
56+
mpi-library, i.e., the mpi-library does not free the memory. This happens for
57+
asynchronously allocated memory, which is attached using MPI_Win_attach. */
58+
FLAG_MEM_NOT_OWNED = 2,
59+
};
60+
5261
/** The caf-token of the mpi-library.
5362
5463
Objects of this data structure are owned by the library and are treated as a
@@ -76,11 +85,12 @@ typedef struct mpi_caf_token_t
7685
window gives access to the descriptor on remote images. When the object is
7786
scalar, then this is NULL. */
7887
MPI_Win *desc;
79-
/** This window allows access the local_memptr member of the associated token.
80-
With async allocation the token may be registered, but the memory not yet. To
81-
be able to check (using is_present()), that the memory on a remote image is
82-
present, this win can be used. */
83-
MPI_Win local_memptr_win;
88+
/** Flags specifying the state of this token. For a description of available
89+
flags see the flags_t. */
90+
unsigned *flags;
91+
/** This window allows access the local flags member of the associated
92+
token. */
93+
MPI_Win flags_win;
8494
} mpi_caf_token_t;
8595
#define TOKEN(X) &(((mpi_caf_token_t *) (X))->memptr)
8696
#else
@@ -498,10 +508,17 @@ PREFIX (finalize) (void)
498508
MPI_Win_free (mpi_token->desc);
499509
free (mpi_token->desc);
500510
}
501-
CAF_Win_unlock_all (mpi_token->local_memptr_win);
502-
MPI_Win_free (&mpi_token->local_memptr_win);
503-
#endif // GCC_GE_7
504511
MPI_Win_free(p);
512+
/* Free the memory only, when it was allocated by the caf-library. */
513+
if ((*(mpi_token->flags) & FLAG_MEM_NOT_OWNED) > 0)
514+
free (mpi_token->local_memptr);
515+
/* Free the flags window only after accessing. */
516+
CAF_Win_unlock_all (mpi_token->flags_win);
517+
MPI_Win_free (&mpi_token->flags_win);
518+
free (tmp_tot->token);
519+
#else // GCC_GE_7
520+
MPI_Win_free(p);
521+
#endif // GCC_GE_7
505522
free(tmp_tot);
506523
tmp_tot = prev;
507524
}
@@ -583,7 +600,14 @@ PREFIX (register) (size_t size, caf_register_t type, caf_token_t *token,
583600
/* The token has to be present, when COARRAY_ALLOC_ALLOCATE_ONLY is
584601
specified. */
585602
if (type != CAF_REGTYPE_COARRAY_ALLOC_ALLOCATE_ONLY)
586-
*token = malloc (sizeof (mpi_caf_token_t));
603+
{
604+
*token = malloc (sizeof (mpi_caf_token_t));
605+
MPI_Win_allocate (sizeof(unsigned), 1, mpi_info_same_size, CAF_COMM_WORLD,
606+
&((mpi_caf_token_t *)*token)->flags,
607+
&((mpi_caf_token_t *)*token)->flags_win);
608+
CAF_Win_lock_all (((mpi_caf_token_t *)*token)->flags_win);
609+
*((mpi_caf_token_t *)*token)->flags = 0;
610+
}
587611

588612
mpi_token = (mpi_caf_token_t *) *token;
589613
p = TOKEN(mpi_token);
@@ -617,28 +641,18 @@ PREFIX (register) (size_t size, caf_register_t type, caf_token_t *token,
617641
{
618642
int ierr;
619643
mem = malloc (actual_size);
644+
CAF_Win_unlock_all (*p);
620645
ierr = MPI_Win_attach (*p, mem, actual_size);
621-
fprintf(stderr, "%d/%d: Attach mem %p to win = %p, ierr: %d\n", caf_this_image, caf_num_images,
622-
mem, *p, ierr);
646+
CAF_Win_lock_all (*p);
647+
*(mpi_token->flags) |= (FLAG_ASSOCIATED | FLAG_MEM_NOT_OWNED);
648+
fprintf(stderr, "%d/%d: Attach mem %p to win = %p, ierr: %d, flags: %u\n",
649+
caf_this_image, caf_num_images, mem, *p, ierr, *mpi_token->flags);
623650
}
624651
else
625652
{
626653
MPI_Win_allocate (actual_size, 1, MPI_INFO_NULL, CAF_COMM_WORLD, &mem, p);
627654
CAF_Win_lock_all (*p);
628-
}
629-
630-
/* When doing a allocate only, the token is initialized already, and the
631-
* window for the local_memptr exists already. Any time else create a window
632-
* to monitor whether the data-pointer of this token is associated or not. */
633-
if (type != CAF_REGTYPE_COARRAY_ALLOC_ALLOCATE_ONLY)
634-
{
635-
int ierr;
636-
ierr = MPI_Win_create (&mpi_token->local_memptr, sizeof (void *), 1,
637-
mpi_info_same_size, CAF_COMM_WORLD,
638-
&mpi_token->local_memptr_win);
639-
fprintf(stderr, "%d/%d: Creating win return error code: %d\n", caf_this_image,
640-
caf_num_images, ierr);
641-
CAF_Win_lock_all (mpi_token->local_memptr_win);
655+
*(mpi_token->flags) = FLAG_ASSOCIATED;
642656
}
643657
#else // MPI_VERSION
644658
MPI_Alloc_mem(actual_size, MPI_INFO_NULL, &mem);
@@ -872,10 +886,20 @@ PREFIX (deregister) (caf_token_t *token, int *stat, char *errmsg, int errmsg_len
872886
/* Unlock only, when removing the window. */
873887
CAF_Win_unlock_all(*p);
874888
MPI_Win_free (p);
889+
if ((*(mpi_token->flags) & FLAG_MEM_NOT_OWNED) > 0)
890+
free (mpi_token->local_memptr);
875891
}
876892
else
877-
MPI_Win_detach (*p, mpi_token->local_memptr);
893+
{
894+
MPI_Win_detach (*p, mpi_token->local_memptr);
895+
/* Free the memory, when we have allocated it. */
896+
if ((*(mpi_token->flags) & FLAG_MEM_NOT_OWNED) > 0)
897+
free (mpi_token->local_memptr);
898+
*(mpi_token->flags) = (*(mpi_token->flags) & ~FLAG_MEM_NOT_OWNED);
899+
}
878900
mpi_token->local_memptr = NULL;
901+
/* Clear the flag, that memory is associated. */
902+
*(mpi_token->flags) = (*(mpi_token->flags) & ~FLAG_ASSOCIATED);
879903
}
880904
if ((*(mpi_caf_token_t **)token)->desc
881905
&& type != CAF_DEREGTYPE_COARRAY_DEALLOCATE_ONLY)
@@ -886,8 +910,8 @@ PREFIX (deregister) (caf_token_t *token, int *stat, char *errmsg, int errmsg_len
886910
}
887911
if (type != CAF_DEREGTYPE_COARRAY_DEALLOCATE_ONLY)
888912
{
889-
CAF_Win_unlock_all (mpi_token->local_memptr_win);
890-
MPI_Win_free (&mpi_token->local_memptr_win);
913+
CAF_Win_unlock_all (mpi_token->flags_win);
914+
MPI_Win_free (&mpi_token->flags_win);
891915
}
892916
#else
893917
CAF_Win_unlock_all(*p);
@@ -934,6 +958,7 @@ PREFIX (sync_all) (int *stat, char *errmsg, int errmsg_len)
934958
{
935959
int ierr=0;
936960

961+
fprintf (stderr, "%d/%d: Entering sync all.\n", caf_this_image, caf_num_images);
937962
if (unlikely (caf_is_finalized))
938963
ierr = STAT_STOPPED_IMAGE;
939964
else
@@ -967,6 +992,7 @@ PREFIX (sync_all) (int *stat, char *errmsg, int errmsg_len)
967992
else
968993
caf_runtime_error (msg);
969994
}
995+
fprintf (stderr, "%d/%d: Leaving sync all.\n", caf_this_image, caf_num_images);
970996
}
971997

972998
/* token: The token of the array to be written to. */
@@ -3108,14 +3134,14 @@ PREFIX(is_present) (caf_token_t token, int image_index, caf_reference_t *refs)
31083134
riter = riter->next;
31093135
}
31103136

3111-
void *remote_local_memory = NULL;
3112-
MPI_Datatype dtype = sizeof (void *) == 8 ? MPI_INTEGER8: MPI_INTEGER4;
3113-
int ierr = MPI_Get (&remote_local_memory, 1, dtype, image_index - 1, 0, 1, dtype,
3114-
mpi_token->local_memptr_win);
3115-
fprintf(stderr, "%d/%d: Got remote_local_memory[%d] for win %p to be: %p, ierr = %d\n",
3116-
caf_this_image, caf_num_images, image_index, mpi_token->memptr,
3117-
remote_local_memory, ierr);
3118-
return remote_local_memory != NULL;
3137+
unsigned remote_flags = 0U;
3138+
int ierr = MPI_Get (&remote_flags, sizeof (unsigned), MPI_BYTE,
3139+
image_index - 1, 0, sizeof (unsigned), MPI_BYTE,
3140+
mpi_token->flags_win);
3141+
fprintf(stderr, "%d/%d: Got remote_flags[%d] for win %p to be: %u, ierr = %d\n",
3142+
caf_this_image, caf_num_images, image_index, mpi_token->flags_win,
3143+
remote_flags, ierr);
3144+
return (remote_flags & FLAG_ASSOCIATED) > 0;
31193145
}
31203146
#endif
31213147

src/tests/unit/init_register/async_comp_alloc.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ program async_comp_alloc
6060
call assert(allocation_status == success, "allocated(obj)")
6161
call assert(.not. allocated(obj%i) , ".not. allocated(obj%i)")
6262

63-
if (me==1) print *,"known failure -- ctest will timeout (see https://github.com/sourceryinstitute/opencoarrays/issues/260)."
64-
6563
block
6664
integer :: allocating_image, test_image
6765
character(len=20) :: image_number
@@ -98,8 +96,10 @@ program async_comp_alloc
9896
call assert( obj%i == me , "obj%i == this_image()")
9997
sync all
10098
end if
99+
sync all
101100
end do loop_over_all_image_numbers
102101
end block
102+
if (me == 1) print *, "Test passed."
103103
end associate
104104
contains
105105
subroutine assert(assertion,description,stat)

0 commit comments

Comments
 (0)