Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions content/shmem_sync_nb.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
\apisummary{
Registers the arrival of a \ac{PE} at a synchronization point.
This routine initiates a nonblocking synchronization operation for a
given \openshmem team and returns immediately without necessarily
completing the operation.
}

\begin{apidefinition}

\begin{C11synopsis}
int @\FuncDecl{shmem\_sync\_nb}@(shmem_team_t team, shmem_req_h *request);
\end{C11synopsis}

\begin{Csynopsis}
int @\FuncDecl{shmem\_team\_sync\_nb}@(shmem_team_t team, shmem_req_h *request);
\end{Csynopsis}

\begin{apiarguments}

\apiargument{IN}{team}{A valid \openshmem team handle to a team.}%
\apiargument{OUT}{request}{An opaque request handle identifying the synchronization
operation.}

\end{apiarguments}

\apidescription{
\FUNC{shmem\_sync\_nb} is a collective nonblocking synchronization routine
over an existing \openshmem team.

{\bf Invocation and completion}: A call to the nonblocking sync routine
initiates the operation and returns immediately without necessarily
completing the operation. On success, an opaque request handle is created
and returned. The completion of the operation can be observed after one or
more calls to \FUNC{shmem\_req\_test} or a call to \FUNC{shmem\_req\_wait}.
When the completion of the operation is observed, the request handle is
deallocated and cannot be reused.

{\bf Synchronization semantics}:
The routine registers the arrival of a \ac{PE} at a synchronization point
in the program. This is a fast mechanism for synchronizing all \acp{PE}
that participate in this collective call. The routine ensures that all
\acp{PE} in the specified team have called \FUNC{shmem\_sync\_nb} when
the completion of the operation is observed.

All \acp{PE} in the provided team must participate
in the sync operation. If \VAR{team} compares equal to
\LibConstRef{SHMEM\_TEAM\_INVALID} or is otherwise invalid, the behavior
is undefined.

Upon completion of the operation, the following is true for the local PE:
\begin{itemize}
\item All \acp{PE} in the team have called \FUNC{shmem\_sync\_nb}.
\item Similar to the blocking \FUNC{shmem\_sync} routine, \FUNC{shmem\_sync\_nb}
only ensures the completion and visibility of previously issued memory stores and
does not ensure the completion of remote memory updates issued via OpenSHMEM routines.
\end{itemize}
}

\apireturnvalues{
Zero on successful local completion. Nonzero otherwise.
}

\apinotes{
The \FUNC{shmem\_sync\_nb} routine can be used to portably ensure that
memory access operations observe remote updates in the order enforced by the
initiator \acp{PE}, provided that the initiator PE ensures completion of remote
updates with a call to \FUNC{shmem\_quiet} prior to the call to the
\FUNC{shmem\_sync\_nb} routine.

Team handle error checking and integer return codes are currently undefined.
Implementations may define these behaviors as needed, but programs should
ensure portability by doing their own checks for invalid team handles and for
\LibConstRef{SHMEM\_TEAM\_INVALID}.
}

\begin{apiexamples}

\apicexample
{The following \Cstd[11] example is analogous to the \FUNC{shmem\_sync}
example, but uses \FUNC{shmem\_sync\_nb} to overlap synchronization on two
teams.}
{./example_code/shmem_sync_nb_example.c}
{}

\end{apiexamples}

\end{apidefinition}
46 changes: 46 additions & 0 deletions example_code/shmem_sync_nb_example.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#include <shmem.h>

int main(void) {
static int x = 10101;

shmem_team_t twos_team = SHMEM_TEAM_INVALID;
shmem_team_config_t *config = NULL;
shmem_req_h req_twos = SHMEM_REQ_INVALID;
shmem_req_h req_world = SHMEM_REQ_INVALID;

shmem_init();
int mype = shmem_my_pe();
int npes = shmem_n_pes();

if (npes > 2)
shmem_team_split_strided(SHMEM_TEAM_WORLD, 2, 2, (npes - 1) / 2, config, 0, &twos_team);

if (twos_team != SHMEM_TEAM_INVALID) {
int mype_twos = shmem_team_my_pe(twos_team);
int npes_twos = shmem_team_n_pes(twos_team);
shmem_p(&x, 2,
shmem_team_translate_pe(twos_team, (mype_twos + 1) % npes_twos, SHMEM_TEAM_WORLD));
shmem_quiet();
}

/* Overlap: initiate world sync while twos_team sync proceeds */
if (twos_team != SHMEM_TEAM_INVALID) {
shmem_sync_nb(twos_team, &req_twos);
}
shmem_sync_nb(SHMEM_TEAM_WORLD, &req_world);
if (req_twos != SHMEM_REQ_INVALID) {
shmem_req_wait(&req_twos);
}
shmem_req_wait(&req_world);

if (mype && mype % 2 == 0) {
if (x != 2) {
shmem_global_exit(2);
}
} else if (x != 10101) {
shmem_global_exit(1);
}

shmem_finalize();
return 0;
}
Loading