|
| 1 | +.\" -*- nroff -*- |
| 2 | +.\" Copyright (c) 2016 Mellanox Technologies, Inc. |
| 3 | +.\" $COPYRIGHT$ |
| 4 | +.de Vb |
| 5 | +.ft CW |
| 6 | +.nf |
| 7 | +.. |
| 8 | +.de Ve |
| 9 | +.ft R |
| 10 | + |
| 11 | +.fi |
| 12 | +.. |
| 13 | +.TH "SHMEM\\_ALLTOALL" "3" "#OMPI_DATE#" "#PACKAGE_VERSION#" "#PACKAGE_NAME#" |
| 14 | +.SH NAME |
| 15 | + |
| 16 | +\fIshmem_alltoall32\fP(3), |
| 17 | +\fIshmem_alltoall64\fP(3), |
| 18 | +\fIshmem_alltoalls32\fP(3), |
| 19 | +\fIshmem_alltoalls64\fP(3) |
| 20 | +\- collective routine where each PE exchanges a fixed amount of data with all |
| 21 | +other PEs in the Active set |
| 22 | +.SH SYNOPSIS |
| 23 | + |
| 24 | +C or C++: |
| 25 | +.Vb |
| 26 | +#include <mpp/shmem.h> |
| 27 | + |
| 28 | +void shmem_alltoall32(void *target, const void *source, |
| 29 | + size_t nelems, int PE_start, int logPE_stride, |
| 30 | + int PE_size, long *pSync); |
| 31 | + |
| 32 | +void shmem_alltoall64(void *target, const void *source, |
| 33 | + size_t nelems, int PE_start, int logPE_stride, |
| 34 | + int PE_size, long *pSync); |
| 35 | + |
| 36 | +void shmem_alltoalls32(void *target, const void *source, |
| 37 | + ptrdiff_t dst, ptrdiff_t sst, |
| 38 | + size_t nelems, int PE_start, int logPE_stride, |
| 39 | + int PE_size, long *pSync); |
| 40 | + |
| 41 | +void shmem_alltoalls64(void *target, const void *source, |
| 42 | + ptrdiff_t dst, ptrdiff_t sst, |
| 43 | + size_t nelems, int PE_start, int logPE_stride, |
| 44 | + int PE_size, long *pSync); |
| 45 | +.Ve |
| 46 | +Fortran: |
| 47 | +.Vb |
| 48 | +INCLUDE "mpp/shmem.fh" |
| 49 | + |
| 50 | +INTEGER dst, sst, nelems, PE_root, PE_start, logPE_stride, PE_size |
| 51 | +INTEGER pSync(SHMEM_ALLTOALL_SYNC_SIZE) |
| 52 | + |
| 53 | +CALL SHMEM_ALLTOALL32(target, source, nelems, |
| 54 | +& PE_start, logPE_stride, PE_size, fIpSync) |
| 55 | + |
| 56 | +CALL SHMEM_ALLTOALL64(target, source, nelems, |
| 57 | +& PE_start, logPE_stride, PE_size, pSync) |
| 58 | + |
| 59 | +CALL SHMEM_ALLTOALLS32(target, source, dst, sst, nelems, |
| 60 | +& PE_start, logPE_stride, PE_size, pSync) |
| 61 | + |
| 62 | +CALL SHMEM_ALLTOALLS64(target, source, dst, sst, nelems, |
| 63 | +& PE_start, logPE_stride, PE_size, pSync) |
| 64 | +.Ve |
| 65 | +.SH DESCRIPTION |
| 66 | + |
| 67 | +.PP |
| 68 | +The shmem_alltoalls routines are collective routines. Each PE in the Active set exchanges nelems strided |
| 69 | +data elements of size 32 bits (for shmem_alltoalls32) or 64 bits (for shmem_alltoalls64) with all other PEs |
| 70 | +in the set. Both strides, dst and sst, must be greater than or equal to 1. The sst*jth block sent from PE i to |
| 71 | +PE j is placed in the dst*ith block of the dest data object on PE j. |
| 72 | +As with all OpenSHMEM collective routines, these routines assume that only PEs in the Active set call the |
| 73 | +routine. If a PE not in the Active set calls an OpenSHMEM collective routine, undefined behavior results. |
| 74 | +The values of arguments dst, sst, nelems, PE_start, logPE_stride, and PE_size must be equal on all PEs in |
| 75 | +the Active set. The same dest and source data objects, and the same pSync work array must be passed to all |
| 76 | +PEs in the Active set. |
| 77 | +Before any PE calls to a shmem_alltoalls routine, the following conditions must exist (synchronization via |
| 78 | +a barrier or some other method is often needed to ensure this): The pSync array on all PEs in the Active set |
| 79 | +is not still in use from a prior call to a shmem_alltoalls routine. The dest data object on all PEs in the |
| 80 | +Active set is ready to accept the shmem_alltoalls data. |
| 81 | +Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest symmetric data |
| 82 | +object is completely updated and the data has been copied out of the source data object. The values in the |
| 83 | +pSync array are restored to the original values. |
| 84 | +.PP |
| 85 | +The arguments are as follows: |
| 86 | +.TP |
| 87 | +A symmetric data object with one of the following data types: |
| 88 | +.RS |
| 89 | +.TP |
| 90 | +\fBshmem_alltoall32\fP: Any noncharacter type that |
| 91 | +has an element size of 32 bits. No Fortran derived types or C/C++ structures are allowed. |
| 92 | +.TP |
| 93 | +\fBshmem_alltoall64\fP: Any noncharacter type that has an element size |
| 94 | +of 64 bits. No Fortran derived types or C/C++ structures are allowed. |
| 95 | +.RE |
| 96 | +.RS |
| 97 | +.PP |
| 98 | +.RE |
| 99 | +target |
| 100 | +A symmetric data object large enough to receive the combined total of |
| 101 | +nelems elements from each PE in the Active set. |
| 102 | +.TP |
| 103 | +source |
| 104 | +A symmetric data object that contains nelems elements of data for each |
| 105 | +PE in the Active set, ordered according to destination PE. |
| 106 | +.TP |
| 107 | +dst |
| 108 | +The stride between consecutive elements of the dest data object. The |
| 109 | +stride is scaled by the element size. A value of 1 indicates contiguous |
| 110 | +data. dst must be of type ptrdiff_t. If you are using Fortran, it must be |
| 111 | +a default integer value. |
| 112 | +.TP |
| 113 | +sst |
| 114 | +The stride between consecutive elements of the source data object. The |
| 115 | +stride is scaled by the element size. A value of 1 indicates contiguous |
| 116 | +data. sst must be of type ptrdiff_t. If you are using Fortran, it must be a |
| 117 | +default integer value. |
| 118 | +.TP |
| 119 | +nelems |
| 120 | +The number of elements to exchange for each PE. nelems must be of |
| 121 | +type size_t for C/C++. If you are using Fortran, it must be a default |
| 122 | +integer value |
| 123 | +.TP |
| 124 | +PE_start |
| 125 | +The lowest virtual PE number of the active set of PEs. PE_start must be of |
| 126 | +type integer. If you are using Fortran, it must be a default integer value. |
| 127 | +.TP |
| 128 | +logPE_stride |
| 129 | +The log (base 2) of the stride between consecutive virtual PE numbers in |
| 130 | +the active set. log_PE_stride must be of type integer. If you are using Fortran, it must be a |
| 131 | +default integer value. |
| 132 | +.TP |
| 133 | +PE_size |
| 134 | +The number of PEs in the active set. PE_size must be of type integer. If you |
| 135 | +are using Fortran, it must be a default integer value. |
| 136 | +.PP |
| 137 | +.TP |
| 138 | +pSync |
| 139 | +A symmetric work array. In C/C++, pSync must be of type long and size |
| 140 | +_SHMEM_ALLTOALL_SYNC_SIZE. |
| 141 | +In Fortran, pSync must be of type integer and size SHMEM_ALLTOALL_SYNC_SIZE. Every |
| 142 | +element of this array must be initialized with the value _SHMEM_SYNC_VALUE (in C/C++) |
| 143 | +or SHMEM_SYNC_VALUE (in Fortran) before any of the PEs in the active set enter |
| 144 | +shmem_barrier(). |
| 145 | +.PP |
| 146 | +Upon return from a shmem_alltoalls routine, the following is true for the local PE: Its dest |
| 147 | +symmetric data object is completely updated and the data has been copied out of the source |
| 148 | +data object. The values in the pSync array are restored to the original values. |
| 149 | +.PP |
| 150 | +The values of arguments PE_root, PE_start, logPE_stride, and PE_size must be equal on |
| 151 | +all PEs in the active set. The same target and source data objects and the same pSync work |
| 152 | +array must be passed to all PEs in the active set. |
| 153 | +.PP |
| 154 | +Before any PE calls a alltoall routine, you must ensure that the following conditions exist |
| 155 | +(synchronization via a barrier or some other method is often needed to ensure this): The |
| 156 | +pSync array on all PEs in the active set is not still in use from a prior call to a alltoall |
| 157 | +routine. The target array on all PEs in the active set is ready to accept the alltoall data. |
| 158 | +.SH NOTES |
| 159 | + |
| 160 | +The terms collective and symmetric are defined in \fIintro_shmem\fP(3)\&. |
| 161 | +.PP |
| 162 | +All SHMEM alltoall routines restore pSync to its original contents. Multiple calls to SHMEM |
| 163 | +routines that use the same pSync array do not require that pSync be reinitialized after the |
| 164 | +first call. |
| 165 | +.PP |
| 166 | +You must ensure the that the pSync array is not being updated by any PE in the active set |
| 167 | +while any of the PEs participates in processing of a SHMEM broadcast routine. Be careful to |
| 168 | +avoid these situations: If the pSync array is initialized at run time, some type of |
| 169 | +synchronization is needed to ensure that all PEs in the working set have initialized pSync |
| 170 | +before any of them enter a SHMEM routine called with the pSync synchronization array. A |
| 171 | +pSync array may be reused on a subsequent SHMEM broadcast routine only if none of the PEs |
| 172 | +in the active set are still processing a prior SHMEM alltoall routine call that used the same |
| 173 | +pSync array. In general, this can be ensured only by doing some type of synchronization. |
| 174 | +However, in the special case of SHMEM routines being called with the same active set, you |
| 175 | +can allocate two pSync arrays and alternate between them on successive calls. |
| 176 | +.PP |
| 177 | +.SH EXAMPLES |
| 178 | + |
| 179 | +.PP |
| 180 | +C/C++ example: |
| 181 | +.Vb |
| 182 | +#include <shmem.h> |
| 183 | +#include <stdio.h> |
| 184 | +long pSync[SHMEM_ALLTOALL_SYNC_SIZE]; |
| 185 | +int main(void) |
| 186 | +{ |
| 187 | +int64_t *source, *dest; |
| 188 | +int i, count, pe; |
| 189 | +shmem_init(); |
| 190 | +count = 2; |
| 191 | +dest = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); |
| 192 | +source = (int64_t*) shmem_malloc(count * shmem_n_pes() * sizeof(int64_t)); |
| 193 | +/* assign source values */ |
| 194 | +for (pe=0; pe <shmem_n_pes(); pe++){ |
| 195 | +for (i=0; i<count; i++){ |
| 196 | +source[(pe*count)+i] = shmem_my_pe() + pe; |
| 197 | +dest[(pe*count)+i] = 9999; |
| 198 | +} |
| 199 | +} |
| 200 | +for (i=0; i< SHMEM_ALLTOALLS_SYNC_SIZE; i++) { |
| 201 | +pSync[i] = SHMEM_SYNC_VALUE; |
| 202 | +} |
| 203 | +/* wait for all PEs to initialize pSync */ |
| 204 | +shmem_barrier_all(); |
| 205 | +/* alltoalls on all PES */ |
| 206 | +shmem_alltoalls64(dest, source, 1, 1, count, 0, 0, shmem_n_pes(), pSync); |
| 207 | +/* verify results */ |
| 208 | +for (pe=0; pe<shmem_n_pes(); pe++) { |
| 209 | +for (i=0; i<count; i++){ |
| 210 | +if (dest[(pe*count)+i] != shmem_my_pe() + pe) { |
| 211 | +printf("[%d] ERROR: dest[%d]=%ld, should be %d\n", |
| 212 | +shmem_my_pe(),(pe*count)+i,dest[(pe*count)+i], |
| 213 | +shmem_n_pes() + pe); |
| 214 | +} |
| 215 | +} |
| 216 | +} |
| 217 | +shmem_barrier_all(); |
| 218 | +shmem_free(dest); |
| 219 | +shmem_free(source); |
| 220 | +shmem_finalize(); |
| 221 | +return 0; |
| 222 | +} |
| 223 | +.PP |
| 224 | +.SH SEE ALSO |
| 225 | + |
| 226 | +\fIintro_shmem\fP(3) |
0 commit comments