Skip to content

Commit 9825157

Browse files
committed
oshmem: Align OSHMEM API with spec v1.3 (Add man for alltoall)
1 parent 3e1e131 commit 9825157

File tree

5 files changed

+233
-0
lines changed

5 files changed

+233
-0
lines changed

oshmem/shmem/man/man3/Makefile.extra

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,10 @@ shmem_api_man_pages = \
132132
shmem/man/man3/shmem_quiet.3 \
133133
shmem/man/man3/shmem_broadcast32.3 \
134134
shmem/man/man3/shmem_broadcast64.3 \
135+
shmem/man/man3/shmem_alltoall32.3 \
136+
shmem/man/man3/shmem_alltoall64.3 \
137+
shmem/man/man3/shmem_alltoalls32.3 \
138+
shmem/man/man3/shmem_alltoalls64.3 \
135139
shmem/man/man3/shmem_collect32.3 \
136140
shmem/man/man3/shmem_collect64.3 \
137141
shmem/man/man3/shmem_fcollect32.3 \
Lines changed: 226 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,226 @@
1+
.\" -*- nroff -*-
2+
.\" Copyright (c) 2016 Mellanox Technologies, Inc.
3+
.\" $COPYRIGHT$
4+
.de Vb
5+
.ft CW
6+
.nf
7+
..
8+
.de Ve
9+
.ft R
10+
11+
.fi
12+
..
13+
.TH "SHMEM\\_ALLTOALL" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#"
14+
.SH NAME
15+
16+
\fIshmem_alltoall32\fP(3),
17+
\fIshmem_alltoall64\fP(3),
18+
\fIshmem_alltoalls32\fP(3),
19+
\fIshmem_alltoalls64\fP(3)
20+
\- collective routine where each PE exchanges a fixed amount of data with all
21+
other PEs in the Active set
22+
.SH SYNOPSIS
23+
24+
C or C++:
25+
.Vb
26+
#include <mpp/shmem.h>
27+
28+
void shmem_alltoall32(void *target, const void *source,
29+
size_t nelems, int PE_start, int logPE_stride,
30+
int PE_size, long *pSync);
31+
32+
void shmem_alltoall64(void *target, const void *source,
33+
size_t nelems, int PE_start, int logPE_stride,
34+
int PE_size, long *pSync);
35+
36+
void shmem_alltoalls32(void *target, const void *source,
37+
ptrdiff_t dst, ptrdiff_t sst,
38+
size_t nelems, int PE_start, int logPE_stride,
39+
int PE_size, long *pSync);
40+
41+
void shmem_alltoalls64(void *target, const void *source,
42+
ptrdiff_t dst, ptrdiff_t sst,
43+
size_t nelems, int PE_start, int logPE_stride,
44+
int PE_size, long *pSync);
45+
.Ve
46+
Fortran:
47+
.Vb
48+
INCLUDE "mpp/shmem.fh"
49+
50+
INTEGER dst, sst, nelems, PE_root, PE_start, logPE_stride, PE_size
51+
INTEGER pSync(SHMEM_ALLTOALL_SYNC_SIZE)
52+
53+
CALL SHMEM_ALLTOALL32(target, source, nelems,
54+
& PE_start, logPE_stride, PE_size, fIpSync)
55+
56+
CALL SHMEM_ALLTOALL64(target, source, nelems,
57+
& PE_start, logPE_stride, PE_size, pSync)
58+
59+
CALL SHMEM_ALLTOALLS32(target, source, dst, sst, nelems,
60+
& PE_start, logPE_stride, PE_size, pSync)
61+
62+
CALL SHMEM_ALLTOALLS64(target, source, dst, sst, nelems,
63+
& PE_start, logPE_stride, PE_size, pSync)
64+
.Ve
65+
.SH DESCRIPTION
66+
67+
.PP
68+
The shmem_alltoalls routines are collective routines. Each PE in the Active set exchanges nelems strided
69+
data elements of size 32 bits (for shmem_alltoalls32) or 64 bits (for shmem_alltoalls64) with all other PEs
70+
in the set. Both strides, dst and sst, must be greater than or equal to 1. The sst*jth block sent from PE i to
71+
PE j is placed in the dst*ith block of the dest data object on PE j.
72+
As with all OpenSHMEM collective routines, these routines assume that only PEs in the Active set call the
73+
routine. If a PE not in the Active set calls an OpenSHMEM collective routine, undefined behavior results.
74+
The values of arguments dst, sst, nelems, PE_start, logPE_stride, and PE_size must be equal on all PEs in
75+
the Active set. The same dest and source data objects, and the same pSync work array must be passed to all
76+
PEs in the Active set.
77+
Before any PE calls to a shmem_alltoalls routine, the following conditions must exist (synchronization via
78+
a barrier or some other method is often needed to ensure this): The pSync array on all PEs in the Active set
79+
is not still in use from a prior call to a shmem_alltoalls routine. The dest data object on all PEs in the
80+
Active set is ready to accept the shmem_alltoalls data.
81+
Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest symmetric data
82+
object is completely updated and the data has been copied out of the source data object. The values in the
83+
pSync array are restored to the original values.
84+
.PP
85+
The arguments are as follows:
86+
.TP
87+
A symmetric data object with one of the following data types:
88+
.RS
89+
.TP
90+
\fBshmem_alltoall32\fP: Any noncharacter type that
91+
has an element size of 32 bits. No Fortran derived types or C/C++ structures are allowed.
92+
.TP
93+
\fBshmem_alltoall64\fP: Any noncharacter type that has an element size
94+
of 64 bits. No Fortran derived types or C/C++ structures are allowed.
95+
.RE
96+
.RS
97+
.PP
98+
.RE
99+
target
100+
A symmetric data object large enough to receive the combined total of
101+
nelems elements from each PE in the Active set.
102+
.TP
103+
source
104+
A symmetric data object that contains nelems elements of data for each
105+
PE in the Active set, ordered according to destination PE.
106+
.TP
107+
dst
108+
The stride between consecutive elements of the dest data object. The
109+
stride is scaled by the element size. A value of 1 indicates contiguous
110+
data. dst must be of type ptrdiff_t. If you are using Fortran, it must be
111+
a default integer value.
112+
.TP
113+
sst
114+
The stride between consecutive elements of the source data object. The
115+
stride is scaled by the element size. A value of 1 indicates contiguous
116+
data. sst must be of type ptrdiff_t. If you are using Fortran, it must be a
117+
default integer value.
118+
.TP
119+
nelems
120+
The number of elements to exchange for each PE. nelems must be of
121+
type size_t for C/C++. If you are using Fortran, it must be a default
122+
integer value
123+
.TP
124+
PE_start
125+
The lowest virtual PE number of the active set of PEs. PE_start must be of
126+
type integer. If you are using Fortran, it must be a default integer value.
127+
.TP
128+
logPE_stride
129+
The log (base 2) of the stride between consecutive virtual PE numbers in
130+
the active set. log_PE_stride must be of type integer. If you are using Fortran, it must be a
131+
default integer value.
132+
.TP
133+
PE_size
134+
The number of PEs in the active set. PE_size must be of type integer. If you
135+
are using Fortran, it must be a default integer value.
136+
.PP
137+
.TP
138+
pSync
139+
A symmetric work array. In C/C++, pSync must be of type long and size
140+
_SHMEM_ALLTOALL_SYNC_SIZE.
141+
In Fortran, pSync must be of type integer and size SHMEM_ALLTOALL_SYNC_SIZE. Every
142+
element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++)
143+
or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter
144+
shmem_barrier().
145+
.PP
146+
Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest
147+
symmetric data object is completely updated and the data has been copied out of the source
148+
data object. The values in the pSync array are restored to the original values.
149+
.PP
150+
The values of arguments PE_root, PE_start, logPE_stride, and PE_size must be equal on
151+
all PEs in the active set. The same target and source data objects and the same pSync work
152+
array must be passed to all PEs in the active set.
153+
.PP
154+
Before any PE calls a alltoall routine, you must ensure that the following conditions exist
155+
(synchronization via a barrier or some other method is often needed to ensure this): The
156+
pSync array on all PEs in the active set is not still in use from a prior call to a alltoall
157+
routine. The target array on all PEs in the active set is ready to accept the alltoall data.
158+
.SH NOTES
159+
160+
The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&.
161+
.PP
162+
All SHMEM alltoall routines restore pSync to its original contents. Multiple calls to SHMEM
163+
routines that use the same pSync array do not require that pSync be reinitialized after the
164+
first call.
165+
.PP
166+
You must ensure the that the pSync array is not being updated by any PE in the active set
167+
while any of the PEs participates in processing of a SHMEM broadcast routine. Be careful to
168+
avoid these situations: If the pSync array is initialized at run time, some type of
169+
synchronization is needed to ensure that all PEs in the working set have initialized pSync
170+
before any of them enter a SHMEM routine called with the pSync synchronization array. A
171+
pSync array may be reused on a subsequent SHMEM broadcast routine only if none of the PEs
172+
in the active set are still processing a prior SHMEM alltoall routine call that used the same
173+
pSync array. In general, this can be ensured only by doing some type of synchronization.
174+
However, in the special case of SHMEM routines being called with the same active set, you
175+
can allocate two pSync arrays and alternate between them on successive calls.
176+
.PP
177+
.SH EXAMPLES
178+
179+
.PP
180+
C/C++ example:
181+
.Vb
182+
#include <shmem.h>
183+
#include <stdio.h>
184+
long pSync[SHMEM_ALLTOALL_SYNC_SIZE];
185+
int main(void)
186+
{
187+
int64_t *source, *dest;
188+
int i, count, pe;
189+
shmem_init();
190+
count = 2;
191+
dest = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t));
192+
source = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t));
193+
/* assign source values */
194+
for (pe=0; pe <shmem_n_pes(); pe++){
195+
for (i=0; i<count; i++){
196+
source[(pe*count)+i] = shmem_my_pe() + pe;
197+
dest[(pe*count)+i] = 9999;
198+
}
199+
}
200+
for (i=0; i< SHMEM_ALLTOALLS_SYNC_SIZE; i++) {
201+
pSync[i] = SHMEM_SYNC_VALUE;
202+
}
203+
/* wait for all PEs to initialize pSync */
204+
shmem_barrier_all();
205+
/* alltoalls on all PES */
206+
shmem_alltoalls64(dest, source, 1, 1, count, 0, 0, shmem_n_pes(), pSync);
207+
/* verify results */
208+
for (pe=0; pe<shmem_n_pes(); pe++) {
209+
for (i=0; i<count; i++){
210+
if (dest[(pe*count)+i] != shmem_my_pe() + pe) {
211+
printf("[%d] ERROR: dest[%d]=%ld, should be %d\n",
212+
shmem_my_pe(),(pe*count)+i,dest[(pe*count)+i],
213+
shmem_n_pes() + pe);
214+
}
215+
}
216+
}
217+
shmem_barrier_all();
218+
shmem_free(dest);
219+
shmem_free(source);
220+
shmem_finalize();
221+
return 0;
222+
}
223+
.PP
224+
.SH SEE ALSO
225+
226+
\fIintro_shmem\fP(3)
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.so man3/shmem_alltoall32.3
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.so man3/shmem_alltoall32.3
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.so man3/shmem_alltoall32.3

0 commit comments

Comments
 (0)