@@ -192,7 +192,7 @@ a 3D array in parallel using hierarchical parallelism.
192192 Array3DReal A ("A", N1, N2, N3);
193193 parallelForOuter(
194194 {N1, N2}, KOKKOS_LAMBDA(int J1, int J2, const TeamMember &Team) {
195- parallelForInner(Team, N3, INNER_LAMBDA(Int J3) {
195+ parallelForInner(Team, N3, INNER_LAMBDA(int J3) {
196196 A(J1, J2, J3) = J1 + J2 + J3;
197197 });
198198 });
@@ -204,7 +204,7 @@ diagonal of a square matrix one can do:
204204 Array2DReal M("M", N, N);
205205 parallelForOuter(
206206 {N}, KOKKOS_LAMBDA(int J1, const TeamMember &Team) {
207- parallelForInner(Team, J1, INNER_LAMBDA(Int J2) {
207+ parallelForInner(Team, J1, INNER_LAMBDA(int J2) {
208208 M(J1, J2) = J1 + J2;
209209 });
210210 });
@@ -220,7 +220,7 @@ in a 2D array might be done as follows.
220220 parallelForOuter(
221221 {N1, N2}, KOKKOS_LAMBDA(int J1, int J2, const TeamMember &Team) {
222222 Real SumD3;
223- parallelReduceInner(Team, N3, INNER_LAMBDA(Int J3, Real &Accum) {
223+ parallelReduceInner(Team, N3, INNER_LAMBDA(int J3, Real &Accum) {
224224 Accum += A(J1, J2, J3);
225225 }, SumD3);
226226 B(J1, J2) = SumD3;
@@ -234,10 +234,10 @@ For example, to additionally compute and store maxima along the third dimension
234234 parallelForOuter(
235235 {N1, N2}, KOKKOS_LAMBDA(int J1, int J2, const TeamMember &Team) {
236236 Real SumD3, MaxD3;
237- parallelReduceInner(Team, N3, INNER_LAMBDA(Int J3, Real &AccumSum, Real &AccumMax) {
237+ parallelReduceInner(Team, N3, INNER_LAMBDA(int J3, Real &AccumSum, Real &AccumMax) {
238238 AccumSum += A(J1, J2, J3);
239- AccumMax = Kokkos::Max (AccumMax, A(J1, J2, J3));
240- }, SumN3, MaxN3 );
239+ AccumMax = Kokkos::max (AccumMax, A(J1, J2, J3));
240+ }, SumD3, Kokkos::Max<Real>(MaxD3) );
241241 B(J1, J2) = SumD3;
242242 C(J1, J2) = MaxD3;
243243 });
@@ -254,7 +254,7 @@ be done as follows.
254254 Array3DReal D("D", N1, N2, N3);
255255 parallelForOuter(
256256 {N1, N2}, KOKKOS_LAMBDA(int J1, int J2, const TeamMember &Team) {
257- parallelScanInner(Team, N1 , INNER_LAMBDA(Int J3, Real &Accum, bool IsFinal) {
257+ parallelScanInner(Team, N3 , INNER_LAMBDA(int J3, Real &Accum, bool IsFinal) {
258258 Accum += A(J1, J2, J3);
259259 if (IsFinal) {
260260 D(J1, J2, J3) = Accum;
@@ -267,7 +267,7 @@ before the `if` statement. That is, it performs an inclusive scan. To compute an
267267simply move the addition after the `if` statement.
268268```c++
269269 Real FinalScanValue;
270- parallelScanInner(Team, N1 , INNER_LAMBDA(Int J3, Real &Accum, bool IsFinal) {
270+ parallelScanInner(Team, N3 , INNER_LAMBDA(int J3, Real &Accum, bool IsFinal) {
271271 if (IsFinal) {
272272 D(J1, J2, J3) = Accum;
273273 }
@@ -280,19 +280,19 @@ and only one-dimensional index range can be used. In contrast to `parallelReduce
280280` parallelScanInner ` supports only sum-based scans and only one scan variable.
281281
282282### parallelSearchInner
283- To search an index range in parallel for the first index where a given condition occurs Omega
284- provides the ` parallelSearchInner ` function.
283+ To search an index range in parallel for the first index at which a given condition occurs,
284+ Omega provides the ` parallelSearchInner ` function.
285285For example, the following code finds, for each row of a matrix, the first column index where
286286the matrix element is above a certain threshold. If no element matches the condition then
287287` parallelSearchInner ` returns ` -1 ` .
288288``` c++
289289 Array2DReal M ("M", N1, N2);
290- Array1DI3 ThresholdIdx("ThresholdIdx", N1);
290+ Array1DI4 ThresholdIdx("ThresholdIdx", N1);
291291 parallelForOuter(
292292 {N1}, KOKKOS_LAMBDA(int J1, const TeamMember &Team) {
293293
294294 int Idx;
295- parallelSearchInner(Team, N2, INNER_LAMBDA(Int J2) {
295+ parallelSearchInner(Team, N2, INNER_LAMBDA(int J2) {
296296 return M(J1, J2) > Threshold;
297297 }, Idx);
298298
0 commit comments